def _my_fit_and_predict(estimator, X, y, train, test, verbose, fit_params, method): fit_params = fit_params if fit_params is not None else {} fit_params = dict([(k, _index_param_value(X, v, train)) for k, v in fit_params.items()]) X_train, y_train = _safe_split(estimator, X, y, train) X_test, Y_test = _safe_split(estimator, X, y, test, train) if y_train is None: estimator.fit(X_train, **fit_params) else: estimator.fit(X_train, y_train, **fit_params) func = getattr(estimator, method) predictions = func(X_test) if method in ['decision_function', 'predict_proba', 'predict_log_proba']: n_classes = len(set(y)) predictions_ = np.zeros((X_test.shape[0], n_classes)) if method == 'decision_function' and len(estimator.classes_) == 2: predictions_[:, estimator.classes_[-1]] = predictions else: predictions_[:, estimator.classes_] = predictions predictions = predictions_ scores = r2_score(y_true=Y_test, y_pred=predictions, multioutput='raw_values') return predictions, test, scores
def _fit_and_predict(estimator, X, y, train, test, verbose, fit_params, method, sample_weights, objective): # Adjust length of sample weights fit_params = fit_params if fit_params is not None else {} fit_params = _check_fit_params(X, fit_params, train) X_train, y_train = _safe_split(estimator, X, y, train) X_test, _ = _safe_split(estimator, X, y, test, train) if y_train is None: estimator.fit(X_train, **fit_params) elif objective is None: estimator.fit(X_train, y_train, sample_weight=sample_weights[train], **fit_params) else: estimator.set_params(**{'objective': lambda l, p: objective(l, p, sample_weights[train])}) estimator.fit(X_train, y_train, **fit_params) func = getattr(estimator, method) predictions = func(X_test) if method in ['decision_function', 'predict_proba', 'predict_log_proba']: if isinstance(predictions, list): predictions = [_enforce_prediction_order( estimator.classes_[i_label], predictions[i_label], n_classes=len(set(y[:, i_label])), method=method) for i_label in range(len(predictions))] else: # A 2D y array should be a binary label indicator matrix n_classes = len(set(y)) if y.ndim == 1 else y.shape[1] predictions = _enforce_prediction_order( estimator.classes_, predictions, n_classes, method) return predictions, test
def _my_fit_and_predict(estimator, X, y, train, test, verbose, fit_params, method): fit_params = fit_params if fit_params is not None else {} fit_params = dict([(k, _index_param_value(X, v, train)) for k, v in fit_params.items()]) X_train, y_train = _safe_split(estimator, X, y, train) X_test, Y_test = _safe_split(estimator, X, y, test, train) if y_train is None: estimator.fit(X_train, **fit_params) else: estimator.fit(X_train, y_train, **fit_params) func = getattr(estimator, method) predictions = func(X_test) if method in ['decision_function', 'predict_proba', 'predict_log_proba']: n_classes = len(set(y)) predictions_ = np.zeros((X_test.shape[0], n_classes)) if method == 'decision_function' and len(estimator.classes_) == 2: predictions_[:, estimator.classes_[-1]] = predictions else: predictions_[:, estimator.classes_] = predictions predictions = predictions_ scores = r2_score(y_true=Y_test, y_pred=predictions, multioutput='raw_values') return predictions, test, scores
def _fe_fit_and_score(estimator, X, y, scorers, train, test, verbose, parameters, fit_params, return_train_score=False, return_parameters=False, fold_specific_X_extractor=None): if verbose > 1: if parameters is None: msg = '' else: msg = '%s' % (', '.join('%s=%s' % (k, v) for k, v in parameters.items())) print("[CV] %s %s" % (msg, (64 - len(msg)) * '.')) # Adjust length of sample weights fit_params = fit_params if fit_params is not None else {} fit_params = dict([(k, _index_param_value(X, v, train)) for k, v in fit_params.items()]) if parameters is not None: estimator.set_params(**parameters) X_train, y_train = _safe_split(estimator, X, y, train) X_test, y_test = _safe_split(estimator, X, y, test, train) if fold_specific_X_extractor: # extend by fold-specific features X_train_additional = fold_specific_X_extractor(train) if X_train_additional is not None: X_train = np.concatenate([X_train, X_train_additional], axis=1) # if y_train is None: estimator.fit(X_train, **fit_params) else: estimator.fit(X_train, y_train, **fit_params) # test_scores = [ _score(estimator, X_test, y_test, scorer) for scorer in scorers ] if return_train_score: train_scores = [ _score(estimator, X_train, y_train, scorer) for scorer in scorers ] # if verbose > 2: msg += ", scores=%s" % test_scores if verbose > 1: print("[CV] %s %s" % ((64 - len(msg)) * '.', msg)) ret = [train_scores, test_scores] if return_train_score else [test_scores] if return_parameters: ret.append(parameters) return ret
def _rfa_single_fit(rfa, estimator, X, y, train, test, scorer): """ Return the score for a fit across one fold. """ X_train, y_train = _safe_split(estimator, X, y, train) X_test, y_test = _safe_split(estimator, X, y, test, train) return rfa._fit( X_train, y_train, lambda estimator, features: _score(estimator, X_test[:, features], y_test, scorer)).scores_
def check_fit_idempotent(name, estimator_orig): # Check that est.fit(X) is the same as est.fit(X).fit(X). Ideally we would # check that the estimated parameters during training (e.g. coefs_) are # the same, but having a universal comparison function for those # attributes is difficult and full of edge cases. So instead we check that # predict(), predict_proba(), decision_function() and transform() return # the same results. check_methods = ["predict", "transform", "decision_function", "predict_proba"] rng = np.random.RandomState(0) if estimator_orig._get_tags()['non_deterministic']: msg = name + ' is non deterministic' raise SkipTest(msg) estimator = clone(estimator_orig) set_random_state(estimator) if 'warm_start' in estimator.get_params().keys(): estimator.set_params(warm_start=False) n_samples = 100 X, _ = _create_small_ts_dataset() X = X.reshape((X.shape[0], X.shape[1])) X = pairwise_estimator_convert_X(X, estimator) if is_regressor(estimator_orig): y = rng.normal(size=n_samples) else: y = rng.randint(low=0, high=2, size=n_samples) train, test = next(ShuffleSplit(test_size=.2, random_state=rng).split(X)) X_train, y_train = _safe_split(estimator, X, y, train) X_test, y_test = _safe_split(estimator, X, y, test, train) # Fit for the first time estimator.fit(X_train, y_train) result = {method: getattr(estimator, method)(X_test) for method in check_methods if hasattr(estimator, method)} # Fit again set_random_state(estimator) estimator.fit(X_train, y_train) for method in check_methods: if hasattr(estimator, method): new_result = getattr(estimator, method)(X_test) if np.issubdtype(new_result.dtype, np.floating): tol = 2*np.finfo(new_result.dtype).eps else: tol = 2*np.finfo(np.float64).eps assert_allclose_dense_sparse( result[method], new_result, atol=max(tol, 1e-9), rtol=max(tol, 1e-7), err_msg="Idempotency check failed for method {}".format(method) )
def _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params=None): if verbose > 1: if parameters is None: msg = "no parameters to be set" else: msg = '%s' % (', '.join('%s=%s' % (k, v) for k, v in parameters.items())) print("[CV] %s %s" % (msg, (64 - len(msg)) * '.')) if num_samples(train) == 0 or num_samples(test) == 0: raise RuntimeError( 'Cross validation error in fit_estimator. The total data set ' 'contains %d elements, which were split into a training set ' 'of %d elements and a test set of %d elements. Unfortunately, ' 'you can\'t have a %s set with 0 elements.' % ( num_samples(X), num_samples(train), num_samples(test), 'training' if num_samples(train) == 0 else 'test')) # adjust length of sample weights n_samples = num_samples(X) fit_params = fit_params if fit_params is not None else {} fit_params = dict([(k, np.asarray(v)[train] if hasattr(v, '__len__') and len(v) == n_samples else v) for k, v in fit_params.items()]) if parameters is not None: estimator.set_params(**parameters) # fit and score start_time = time.time() X_train, y_train = _safe_split(estimator, X, y, train) X_test, y_test = _safe_split(estimator, X, y, test, train) if y_train is None: estimator.fit(X_train, **fit_params) else: estimator.fit(X_train, y_train, **fit_params) test_score = _score(estimator, X_test, y_test, scorer) train_score = _score(estimator, X_train, y_train, scorer) scoring_time = time.time() - start_time msmbuilder_api = is_msmbuilder_estimator(estimator) n_samples_test = num_samples(X_test, is_nested=msmbuilder_api) n_samples_train = num_samples(X_train, is_nested=msmbuilder_api) if verbose > 2: msg += ", score=%f" % test_score if verbose > 1: end_msg = "%s -%s" % (msg, short_format_time(scoring_time)) print("[CV] %s %s" % ((64 - len(end_msg)) * '.', end_msg)) return (test_score, n_samples_test, train_score, n_samples_train, scoring_time)
def _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params=None): if verbose > 1: if parameters is None: msg = "no parameters to be set" else: msg = '%s' % (', '.join('%s=%s' % (k, v) for k, v in parameters.items())) print("[CV] %s %s" % (msg, (64 - len(msg)) * '.')) if num_samples(train) == 0 or num_samples(test) == 0: raise RuntimeError( 'Cross validation error in fit_estimator. The total data set ' 'contains %d elements, which were split into a training set ' 'of %d elements and a test set of %d elements. Unfortunately, ' 'you can\'t have a %s set with 0 elements.' % ( num_samples(X), num_samples(train), num_samples(test), 'training' if num_samples(train) == 0 else 'test')) # adjust length of sample weights n_samples = num_samples(X) fit_params = fit_params if fit_params is not None else {} fit_params = dict([(k, np.asarray(v)[train] if hasattr(v, '__len__') and len(v) == n_samples else v) for k, v in fit_params.items()]) if parameters is not None: estimator.set_params(**parameters) # fit and score start_time = time.time() X_train, y_train = _safe_split(estimator, X, y, train) X_test, y_test = _safe_split(estimator, X, y, test, train) if y_train is None: estimator.fit(X_train, **fit_params) else: estimator.fit(X_train, y_train, **fit_params) test_score = _score(estimator, X_test, y_test, scorer) train_score = _score(estimator, X_train, y_train, scorer) scoring_time = time.time() - start_time msmbuilder_api = is_msmbuilder_estimator(estimator) n_samples_test = num_samples(X_test, is_nested=msmbuilder_api) n_samples_train = num_samples(X_train, is_nested=msmbuilder_api) if verbose > 2: msg += ", score=%f" % test_score if verbose > 1: end_msg = "%s -%s" % (msg, short_format_time(scoring_time)) print("[CV] %s %s" % ((64 - len(end_msg)) * '.', end_msg)) return (test_score, n_samples_test, train_score, n_samples_train, scoring_time)
def _rfe_train_test(rfe, estimator, X, y, train, test, scorer): """ Return the score for a fit across one fold. """ X_train, y_train = _safe_split(estimator, X, y, train) X_test, y_test = _safe_split(estimator, X, y, test, train) train_score = rfe._fit( X_train, y_train, lambda estimator, features: _score( estimator, X_test[:, features], y_test, scorer)).scores_ test_score = rfe._fit( X_train, y_train, lambda estimator, features: _score( estimator, X_train[:, features], y_train, scorer)).scores_ return train_score, test_score
def _rfe_single_fit(rfe, estimator, X, y, train, test, scorer): """ Return the score for a fit across one fold. """ X_train, y_train = _safe_split(estimator, X, y, train) X_test, y_test = _safe_split(estimator, X, y, test, train) oversampler = SMOTE(ratio='minority', random_state=None, k=None, k_neighbors=5, m=None, m_neighbors=10, out_step=0.5, kind='regular', svm_estimator=None, n_jobs=1) X_train, y_train = oversampler.fit_sample(X_train, y_train) return rfe._fit( X_train, y_train, lambda estimator, features: _score( estimator, X_test[:, features], y_test, scorer)).scores_
def nested_fit_and_score(estimator, X, y, scorer, train, test, verbose=1, parameters=None, fit_params=None, return_train_score=False, return_times=False, error_score='raise'): """ """ from sklearn.externals.joblib.logger import short_format_time # Adjust length of sample weights fit_params = fit_params if fit_params is not None else {} fit_params = dict([(k, _index_param_value(X, v, train)) for k, v in fit_params.items()]) if parameters is not None: estimator.set_params(**parameters) start_time = time.time() X_train, y_train = _safe_split(estimator, X, y, train) X_test, y_test = _safe_split(estimator, X, y, test, train) if verbose > 1: LOG.info( 'CV iteration: Xtrain=%d, Ytrain=%d/%d -- Xtest=%d, Ytest=%d/%d.', len(X_train), len(X_train) - sum(y_train), sum(y_train), len(X_test), len(X_test) - sum(y_test), sum(y_test)) try: if y_train is None: estimator.fit(X_train, **fit_params) else: estimator.fit(X_train, y_train, **fit_params) except Exception as e: # Note fit time as time until error fit_time = time.time() - start_time score_time = 0.0 if error_score == 'raise': raise elif isinstance(error_score, numbers.Number): test_score = error_score if return_train_score: train_score = error_score LOG.warning( "Classifier fit failed. The score on this train-test" " partition for these parameters will be set to %f. " "Details: \n%r", error_score, e) else: raise ValueError("error_score must be the string 'raise' or a" " numeric value. (Hint: if using 'raise', please" " make sure that it has been spelled correctly.)") else: fit_time = time.time() - start_time test_score = None score_time = 0.0 if len(set(y_test)) > 1: test_score = _score(estimator, X_test, y_test, scorer) score_time = time.time() - start_time - fit_time else: LOG.warning( 'Test set has no positive labels, scoring has been skipped ' 'in this loop.') if return_train_score: train_score = _score(estimator, X_train, y_train, scorer) acc_score = _score(estimator, X_test, y_test, check_scoring(estimator, scoring='accuracy')) if verbose > 0: total_time = score_time + fit_time if test_score is not None: LOG.info('Iteration took %s, score=%f, accuracy=%f.', short_format_time(total_time), test_score, acc_score) else: LOG.info('Iteration took %s, score=None, accuracy=%f.', short_format_time(total_time), acc_score) ret = {'test': {'score': test_score, 'accuracy': acc_score}} if return_train_score: ret['train'] = {'score': train_score} if return_times: ret['times'] = [fit_time, score_time] return ret, estimator
def _model_fit_and_score(estimator_str, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score=False, return_parameters=False, return_n_test_samples=False, return_times=False, error_score='raise'): """ """ if verbose > 1: msg = '[CV model=%s]' % estimator_str.upper() if parameters is not None: msg += ' %s' % (', '.join('%s=%s' % (k, v) for k, v in parameters.items())) LOG.info("%s %s", msg, (89 - len(msg)) * '.') estimator = _clf_build(estimator_str) # Adjust length of sample weights fit_params = fit_params if fit_params is not None else {} fit_params = dict([(k, _index_param_value(X, v, train)) for k, v in fit_params.items()]) if parameters is not None: estimator.set_params(**parameters) start_time = time.time() X_train, y_train = _safe_split(estimator, X, y, train) X_test, y_test = _safe_split(estimator, X, y, test, train) try: if y_train is None: estimator.fit(X_train, **fit_params) else: estimator.fit(X_train, y_train, **fit_params) except Exception as e: # Note fit time as time until error fit_time = time.time() - start_time score_time = 0.0 if error_score == 'raise': raise elif isinstance(error_score, numbers.Number): test_score = error_score if return_train_score: train_score = error_score warnings.warn( "Classifier fit failed. The score on this train-test" " partition for these parameters will be set to %f. " "Details: \n%r" % (error_score, e), FitFailedWarning) else: raise ValueError("error_score must be the string 'raise' or a" " numeric value. (Hint: if using 'raise', please" " make sure that it has been spelled correctly.)") else: fit_time = time.time() - start_time scorer = check_scoring(estimator, scoring=scorer) test_score = _score(estimator, X_test, y_test, scorer) score_time = time.time() - start_time - fit_time if return_train_score: train_score = _score(estimator, X_train, y_train, scorer) if verbose > 2: msg += ", score=%f" % test_score if verbose > 1: total_time = score_time + fit_time end_msg = "%s, total=%s" % (msg, logger.short_format_time(total_time)) LOG.info(end_msg) ret = [train_score, test_score] if return_train_score else [test_score] if return_n_test_samples: ret.append(_num_samples(X_test)) if return_times: ret.extend([fit_time, score_time]) if return_parameters: ret.append((estimator_str, parameters)) return ret
def _fit_and_predict(estimator, X, y, train, test, verbose, fit_params, method): """Fit estimator and predict values for a given dataset split. Read more in the :ref:`User Guide <cross_validation>`. Parameters ---------- estimator : estimator object implementing 'fit' and 'predict' The object to use to fit the data. X : array-like of shape at least 2D The data to fit. y : array-like, optional, default: None The target variable to try to predict in the case of supervised learning. train : array-like, shape (n_train_samples,) Indices of training samples. test : array-like, shape (n_test_samples,) Indices of test samples. verbose : integer The verbosity level. fit_params : dict or None Parameters that will be passed to ``estimator.fit``. method : string Invokes the passed method name of the passed estimator. Returns ------- predictions : sequence Result of calling 'estimator.method' test : array-like This is the value of the test parameter """ # Adjust length of sample weights fit_params = fit_params if fit_params is not None else {} fit_params = dict([(k, _index_param_value(X, v, train)) for k, v in fit_params.items()]) if "indices" in inspect.signature( estimator.fit).parameters: # the different part fit_params["indices"] = train X_train, y_train = _safe_split(estimator, X, y, train) X_test, _ = _safe_split(estimator, X, y, test, train) if y_train is None: estimator.fit(X_train, **fit_params) else: estimator.fit(X_train, y_train, **fit_params) func = getattr(estimator, method) predictions = func(X_test) if method in ['decision_function', 'predict_proba', 'predict_log_proba']: n_classes = len(set(y)) predictions_ = np.zeros((X_test.shape[0], n_classes)) if method == 'decision_function' and len(estimator.classes_) == 2: predictions_[:, estimator.classes_[-1]] = predictions else: predictions_[:, estimator.classes_] = predictions predictions = predictions_ return predictions, test
def _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score=False, return_parameters=False, return_n_test_samples=False, return_times=False, error_score='raise'): """Fit estimator and compute scores for a given dataset split. Parameters ---------- estimator : estimator object implementing 'fit' The object to use to fit the data. X : array-like of shape at least 2D The data to fit. y : array-like, optional, default: None The target variable to try to predict in the case of supervised learning. scorer : A single callable or dict mapping scorer name to the callable If it is a single callable, the return value for ``train_scores`` and ``test_scores`` is a single float. For a dict, it should be one mapping the scorer name to the scorer callable object / function. The callable object / fn should have signature ``scorer(estimator, X, y)``. train : array-like, shape (n_train_samples,) Indices of training samples. test : array-like, shape (n_test_samples,) Indices of test samples. verbose : integer The verbosity level. error_score : 'raise' (default) or numeric Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, FitFailedWarning is raised. This parameter does not affect the refit step, which will always raise the error. parameters : dict or None Parameters to be set on the estimator. fit_params : dict or None Parameters that will be passed to ``estimator.fit``. return_train_score : boolean, optional, default: False Compute and return score on training set. return_parameters : boolean, optional, default: False Return parameters that has been used for the estimator. return_n_test_samples : boolean, optional, default: False Whether to return the ``n_test_samples`` return_times : boolean, optional, default: False Whether to return the fit/score times. Returns ------- train_scores : dict of scorer name -> float, optional Score on training set (for all the scorers), returned only if `return_train_score` is `True`. test_scores : dict of scorer name -> float, optional Score on testing set (for all the scorers). n_test_samples : int Number of test samples. fit_time : float Time spent for fitting in seconds. score_time : float Time spent for scoring in seconds. parameters : dict or None, optional The parameters that have been evaluated. """ if verbose > 1: if parameters is None: msg = '' else: msg = '%s' % (', '.join('%s=%s' % (k, v) for k, v in parameters.items())) print("[CV] %s %s" % (msg, (64 - len(msg)) * '.')) # Adjust length of sample weights fit_params = fit_params if fit_params is not None else {} fit_params = dict([(k, _index_param_value(X, v, train)) for k, v in fit_params.items()]) if "indices" in inspect.signature(estimator.fit).parameters: fit_params["indices"] = train # the different part test_scores = {} train_scores = {} if parameters is not None: estimator.set_params(**parameters) start_time = time.time() X_train, y_train = _safe_split(estimator, X, y, train) X_test, y_test = _safe_split(estimator, X, y, test, train) is_multimetric = not callable(scorer) n_scorers = len(scorer.keys()) if is_multimetric else 1 try: if y_train is None: estimator.fit(X_train, **fit_params) else: estimator.fit(X_train, y_train, **fit_params) except Exception as e: # Note fit time as time until error fit_time = time.time() - start_time score_time = 0.0 if error_score == 'raise': raise elif isinstance(error_score, numbers.Number): if is_multimetric: test_scores = dict( zip(scorer.keys(), [ error_score, ] * n_scorers)) if return_train_score: train_scores = dict( zip(scorer.keys(), [ error_score, ] * n_scorers)) else: test_scores = error_score if return_train_score: train_scores = error_score warnings.warn( "Classifier fit failed. The score on this train-test" " partition for these parameters will be set to %f. " "Details: \n%r" % (error_score, e), FitFailedWarning) else: raise ValueError("error_score must be the string 'raise' or a" " numeric value. (Hint: if using 'raise', please" " make sure that it has been spelled correctly.)") else: fit_time = time.time() - start_time # _score will return dict if is_multimetric is True test_scores = _score(estimator, X_test, y_test, scorer, is_multimetric) score_time = time.time() - start_time - fit_time if return_train_score: train_scores = _score(estimator, X_train, y_train, scorer, is_multimetric) if verbose > 2: if is_multimetric: for scorer_name, score in test_scores.items(): msg += ", %s=%s" % (scorer_name, score) else: msg += ", score=%s" % test_scores if verbose > 1: total_time = score_time + fit_time end_msg = "%s, total=%s" % (msg, logger.short_format_time(total_time)) print("[CV] %s %s" % ((64 - len(end_msg)) * '.', end_msg)) ret = [train_scores, test_scores] if return_train_score else [test_scores] if return_n_test_samples: ret.append(_num_samples(X_test)) if return_times: ret.extend([fit_time, score_time]) if return_parameters: ret.append(parameters) return ret
def nested_fit_and_score( estimator, X, y, scorer, train, test, verbose=1, parameters=None, fit_params=None, return_train_score=False, return_times=False, error_score='raise'): """ """ from sklearn.externals.joblib.logger import short_format_time # Adjust length of sample weights fit_params = fit_params if fit_params is not None else {} fit_params = dict([(k, _index_param_value(X, v, train)) for k, v in fit_params.items()]) if parameters is not None: estimator.set_params(**parameters) start_time = time.time() X_train, y_train = _safe_split(estimator, X, y, train) X_test, y_test = _safe_split(estimator, X, y, test, train) if verbose > 1: LOG.info('CV iteration: Xtrain=%d, Ytrain=%d/%d -- Xtest=%d, Ytest=%d/%d.', len(X_train), len(X_train) - sum(y_train), sum(y_train), len(X_test), len(X_test) - sum(y_test), sum(y_test)) try: if y_train is None: estimator.fit(X_train, **fit_params) else: estimator.fit(X_train, y_train, **fit_params) except Exception as e: # Note fit time as time until error fit_time = time.time() - start_time score_time = 0.0 if error_score == 'raise': raise elif isinstance(error_score, numbers.Number): test_score = error_score if return_train_score: train_score = error_score LOG.warn("Classifier fit failed. The score on this train-test" " partition for these parameters will be set to %f. " "Details: \n%r", error_score, e) else: raise ValueError("error_score must be the string 'raise' or a" " numeric value. (Hint: if using 'raise', please" " make sure that it has been spelled correctly.)") else: fit_time = time.time() - start_time test_score = None score_time = 0.0 if len(set(y_test)) > 1: test_score = _score(estimator, X_test, y_test, scorer) score_time = time.time() - start_time - fit_time else: LOG.warn('Test set has no positive labels, scoring has been skipped ' 'in this loop.') if return_train_score: train_score = _score(estimator, X_train, y_train, scorer) acc_score = _score(estimator, X_test, y_test, check_scoring(estimator, scoring='accuracy')) if verbose > 0: total_time = score_time + fit_time if test_score is not None: LOG.info('Iteration took %s, score=%f, accuracy=%f.', short_format_time(total_time), test_score, acc_score) else: LOG.info('Iteration took %s, score=None, accuracy=%f.', short_format_time(total_time), acc_score) ret = { 'test': {'score': test_score, 'accuracy': acc_score} } if return_train_score: ret['train'] = {'score': train_score} if return_times: ret['times'] = [fit_time, score_time] return ret, estimator
def _model_fit_and_score(estimator_str, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score=False, return_parameters=False, return_n_test_samples=False, return_times=False, error_score='raise'): """ """ if verbose > 1: msg = '[CV model=%s]' % estimator_str.upper() if parameters is not None: msg += ' %s' % (', '.join('%s=%s' % (k, v) for k, v in parameters.items())) LOG.info("%s %s", msg, (89 - len(msg)) * '.') estimator = _clf_build(estimator_str) # Adjust length of sample weights fit_params = fit_params if fit_params is not None else {} fit_params = dict([(k, _index_param_value(X, v, train)) for k, v in fit_params.items()]) if parameters is not None: estimator.set_params(**parameters) start_time = time.time() X_train, y_train = _safe_split(estimator, X, y, train) X_test, y_test = _safe_split(estimator, X, y, test, train) try: if y_train is None: estimator.fit(X_train, **fit_params) else: estimator.fit(X_train, y_train, **fit_params) except Exception as e: # Note fit time as time until error fit_time = time.time() - start_time score_time = 0.0 if error_score == 'raise': raise elif isinstance(error_score, numbers.Number): test_score = error_score if return_train_score: train_score = error_score warnings.warn("Classifier fit failed. The score on this train-test" " partition for these parameters will be set to %f. " "Details: \n%r" % (error_score, e), FitFailedWarning) else: raise ValueError("error_score must be the string 'raise' or a" " numeric value. (Hint: if using 'raise', please" " make sure that it has been spelled correctly.)") else: fit_time = time.time() - start_time scorer = check_scoring(estimator, scoring=scorer) test_score = _score(estimator, X_test, y_test, scorer) score_time = time.time() - start_time - fit_time if return_train_score: train_score = _score(estimator, X_train, y_train, scorer) if verbose > 2: msg += ", score=%f" % test_score if verbose > 1: total_time = score_time + fit_time end_msg = "%s, total=%s" % (msg, logger.short_format_time(total_time)) LOG.info(end_msg) ret = [train_score, test_score] if return_train_score else [test_score] if return_n_test_samples: ret.append(_num_samples(X_test)) if return_times: ret.extend([fit_time, score_time]) if return_parameters: ret.append((estimator_str, parameters)) return ret