def test_type_of_target(): for group, group_examples in EXAMPLES.items(): for example in group_examples: assert_equal(type_of_target(example), group, msg=('type_of_target(%r) should be %r, got %r' % (example, group, type_of_target(example)))) for example in NON_ARRAY_LIKE_EXAMPLES: msg_regex = r'Expected array-like \(array or non-string sequence\).*' assert_raises_regex(ValueError, msg_regex, type_of_target, example) for example in MULTILABEL_SEQUENCES: msg = ('You appear to be using a legacy multi-label data ' 'representation. Sequence of sequences are no longer supported;' ' use a binary array or sparse matrix instead.') assert_raises_regex(ValueError, msg, type_of_target, example) try: from pandas import SparseSeries except ImportError: raise SkipTest("Pandas not found") y = SparseSeries([1, 0, 0, 1, 0]) msg = "y cannot be class 'SparseSeries'." assert_raises_regex(ValueError, msg, type_of_target, y)
def fit(self, X, y): """Find the classes statistics before to perform sampling. Parameters ---------- X : ndarray, shape (n_samples, n_features) Matrix containing the data which have to be sampled. y : ndarray, shape (n_samples, ) Corresponding label for each sample in X. Returns ------- self : object, Return self. """ super(BaseMulticlassSampler, self).fit(X, y) # Check that the target type is either binary or multiclass if not (type_of_target(y) == 'binary' or type_of_target(y) == 'multiclass'): warnings.simplefilter('always', UserWarning) warnings.warn('The target type should be binary or multiclass.') return self
def test_type_of_target(): for group, group_examples in iteritems(EXAMPLES): for example in group_examples: assert_equal(type_of_target(example), group, msg='type_of_target(%r) should be %r, got %r' % (example, group, type_of_target(example))) for example in NON_ARRAY_LIKE_EXAMPLES: assert_raises(ValueError, type_of_target, example)
def _check_targets_hmc(y_true, y_pred): check_consistent_length(y_true, y_pred) y_type = set([type_of_target(y_true), type_of_target(y_pred)]) if y_type == set(["binary", "multiclass"]): y_type = set(["multiclass"]) if y_type != set(["multiclass"]): raise ValueError("{0} is not supported".format(y_type)) y_true = column_or_1d(y_true) y_pred = column_or_1d(y_pred) return y_true, y_pred
def _check_clf_targets(y_true, y_pred): """Check that y_true and y_pred belong to the same classification task This converts multiclass or binary types to a common shape, and raises a ValueError for a mix of multilabel and multiclass targets, a mix of multilabel formats, for the presence of continuous-valued or multioutput targets, or for targets of different lengths. Column vectors are squeezed to 1d. Parameters ---------- y_true : array-like, y_pred : array-like Returns ------- type_true : one of {'multilabel-indicator', 'multilabel-sequences', \ 'multiclass', 'binary'} The type of the true target data, as output by ``utils.multiclass.type_of_target`` y_true : array or indicator matrix or sequence of sequences y_pred : array or indicator matrix or sequence of sequences """ y_true, y_pred = check_arrays(y_true, y_pred, allow_lists=True) type_true = type_of_target(y_true) type_pred = type_of_target(y_pred) y_type = set([type_true, type_pred]) if y_type == set(["binary", "multiclass"]): y_type = set(["multiclass"]) if len(y_type) > 1: raise ValueError("Can't handle mix of {0} and {1}" "".format(type_true, type_pred)) # We can't have more than one value on y_type => The set is no more needed y_type = y_type.pop() # No metrics support "multiclass-multioutput" format if y_type not in ["binary", "multiclass", "multilabel-indicator", "multilabel-sequences"]: raise ValueError("{0} is not supported".format(y_type)) if y_type in ["binary", "multiclass"]: y_true = column_or_1d(y_true) y_pred = column_or_1d(y_pred) return y_type, y_true, y_pred
def fit(self, X, y): """Find the classes statistics before to perform sampling. Parameters ---------- X : ndarray, shape (n_samples, n_features) Matrix containing the data which have to be sampled. y : ndarray, shape (n_samples, ) Corresponding label for each sample in X. Returns ------- self : object, Return self. """ super(BaseBinarySampler, self).fit(X, y) # Check that the target type is binary if not type_of_target(y) == 'binary': warnings.warn('The target type should be binary.') return self
def check_target_type(y, indicate_one_vs_all=False): """Check the target types to be conform to the current samplers. The current samplers should be compatible with ``'binary'``, ``'multilabel-indicator'`` and ``'multiclass'`` targets only. Parameters ---------- y : ndarray, The array containing the target. indicate_one_vs_all : bool, optional Either to indicate if the targets are encoded in a one-vs-all fashion. Returns ------- y : ndarray, The returned target. is_one_vs_all : bool, optional Indicate if the target was originally encoded in a one-vs-all fashion. Only returned if ``indicate_multilabel=True``. """ type_y = type_of_target(y) if type_y == 'multilabel-indicator': if np.any(y.sum(axis=1) > 1): raise ValueError( "When 'y' corresponds to '{}', 'y' should encode the " "multiclass (a single 1 by row).".format(type_y)) y = y.argmax(axis=1) return (y, type_y == 'multilabel-indicator') if indicate_one_vs_all else y
def test_type_of_target(): for group, group_examples in iteritems(EXAMPLES): for example in group_examples: assert_equal(type_of_target(example), group, msg=('type_of_target(%r) should be %r, got %r' % (example, group, type_of_target(example)))) for example in NON_ARRAY_LIKE_EXAMPLES: msg_regex = 'Expected array-like \(array or non-string sequence\).*' assert_raises_regex(ValueError, msg_regex, type_of_target, example) for example in MULTILABEL_SEQUENCES: msg = ('You appear to be using a legacy multi-label data ' 'representation. Sequence of sequences are no longer supported;' ' use a binary array or sparse matrix instead.') assert_raises_regex(ValueError, msg, type_of_target, example)
def _posibility(self, x, tag, event=1): """计算触发概率 Parameters: ---------- x (Sequence): - 离散特征序列 tag (Sequence): - 用于训练的标签序列 event (any): - True指代的触发事件 Returns: ---------- Dict[str,Tuple[rate_T, rate_F]]: - 训练好后的好坏触发概率 """ if type_of_target(tag) not in ['binary']: raise AttributeError("tag must be a binary array") #if type_of_target(x) in ['continuous']: # raise AttributeError("input array must not continuous") tag = np.array(tag) x = np.array(x) event_total = (tag == event).sum() non_event_total = tag.shape[-1] - event_total x_labels = pd.unique(x[pd.notnull(x)]) pos_dic = {} for x1 in x_labels: # 当 x1 是nan时,y1 也为空 y1 = tag[np.where(x == x1)[0]] event_count = (y1 == event).sum() non_event_count = y1.shape[-1] - event_count rate_event = 1.0 * event_count / event_total rate_non_event = 1.0 * non_event_count / non_event_total pos_dic[x1] = (rate_event, rate_non_event) return pos_dic
def _sampling_strategy_float(sampling_strategy, y, sampling_type): """Take a proportion of the majority (over-sampling) or minority (under-sampling) class in binary classification.""" type_y = type_of_target(y) if type_y != 'binary': raise ValueError( '"sampling_strategy" can be a float only when the type ' 'of target is binary. For multi-class, use a dict.') target_stats = Counter(y) if sampling_type == 'over-sampling': n_sample_majority = max(target_stats.values()) class_majority = max(target_stats, key=target_stats.get) sampling_strategy_ = { key: int(n_sample_majority * sampling_strategy - value) for (key, value) in target_stats.items() if key != class_majority } elif (sampling_type == 'under-sampling'): n_sample_minority = min(target_stats.values()) class_minority = min(target_stats, key=target_stats.get) sampling_strategy_ = { key: int(n_sample_minority / sampling_strategy) for (key, value) in target_stats.items() if key != class_minority } else: raise ValueError("'clean-sampling' methods do let the user " "specify the sampling ratio.") return sampling_strategy_
def cross_val_score_one_vs_all_per_class(estimator, X, y=None, *args, **kargs): y_type = type_of_target(y) positive_example_amount = y.sum(axis=0) error = "" if (positive_example_amount < kargs["cv"]).any(): error = ( str((positive_example_amount < kargs["cv"]).sum()) + " : too little examples for " + str(np.where(positive_example_amount < kargs["cv"])) + str(positive_example_amount[np.where(positive_example_amount < kargs["cv"])]) ) if (positive_example_amount > y.shape[0] - kargs["cv"]).any(): error += ( str((positive_example_amount > y.shape[0] - kargs["cv"]).sum()) + " : too many examples for " + str(np.where(positive_example_amount > y.shape[0] - kargs["cv"])) + str(positive_example_amount[np.where(positive_example_amount > y.shape[0] - kargs["cv"])]) ) # if error: # raise Exception(error) if y_type.startswith("multilabel") and isinstance(estimator, OneVsRestClassifier): res = [] for yy in y.transpose(): res.append(_cross_val_score(deepcopy(estimator.estimator), X, yy, *args, **kargs)) import pdb pdb.set_trace() else: res = _cross_val_score(estimator, X, y, *args, **kargs) return np.array(list(res))
def fit(self, X, y): """Fit MLP Classifier according to X, y Parameters ---------- X : array-like, shape = [n_samples, n_features] Training vectors, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape = [n_samples] or [n_samples, n_classes] Target values. It determines the problem type. *binary* If y is a vector of integers with two unique values. *multiclass* If y is a vector of integers with three or more values or if y is a two-dimensional array of integers and there exists only one non-zero element per row. *multiclass-multioutput* If y is two-dimensional array of integers with two unique values and there exists more than one non-zero element per row. *continuous* If y is a vector of floats. *continuous-multioutput* If y is a two-dimensional array of floats. Returns ------- self : object Returns self. """ X, = check_arrays(X, sparse_format='dense') n_samples, self.input_size_ = X.shape y = np.atleast_1d(y) self.type_of_target_ = type_of_target(y) if self.verbose > 0: print("The inferred type of y is %s" % self.type_of_target_) if self.type_of_y != None: if self.type_of_y != self.type_of_target_: print("Passed type of y is %s, inferred type is %s" % (self.type_of_y, self.type_of_target_)) raise("derp") self.check_type_implemented() y = self._get_output(y) X, y = self._scale(X, y) self._inst_mlp() self._fit_mlp(X, y) if self.dropout and self.type_of_target_ in ['continuous', 'continuous-multioutput']: self._lineregress(X, y)
def check_target_binary(self, y): ''' check if the target variable is binary, raise error if not. :param y: :return: ''' y_type = type_of_target(y) if y_type not in ['binary']: raise ValueError('Label type must be binary')
def check_samplers_multiclass_ova(name, Sampler): # Check that multiclass target lead to the same results than OVA encoding X, y = make_classification(n_samples=1000, n_classes=3, n_informative=4, weights=[0.2, 0.3, 0.5], random_state=0) y_ova = label_binarize(y, np.unique(y)) sampler = Sampler() # FIXME: in 0.6 set the random_state for all if name not in DONT_HAVE_RANDOM_STATE: set_random_state(sampler) X_res, y_res = sampler.fit_resample(X, y) X_res_ova, y_res_ova = sampler.fit_resample(X, y_ova) assert_allclose(X_res, X_res_ova) if issubclass(Sampler, BaseEnsembleSampler): for batch_y, batch_y_ova in zip(y_res, y_res_ova): assert type_of_target(batch_y_ova) == type_of_target(y_ova) assert_allclose(batch_y, batch_y_ova.argmax(axis=1)) else: assert type_of_target(y_res_ova) == type_of_target(y_ova) assert_allclose(y_res, y_res_ova.argmax(axis=1))
def _check_cv(cv=3, y=None, classifier=False, **kwargs): """Input checker utility for building a cross-validator. Parameters ---------- cv : int, cross-validation generator or an iterable, optional Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 3-fold cross-validation, - integer, to specify the number of folds. - An object to be used as a cross-validation generator. - An iterable yielding train/test splits. For integer/None inputs, if classifier is True and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide <cross_validation>` for the various cross-validation strategies that can be used here. y : array-like, optional The target variable for supervised learning problems. classifier : boolean, optional, default False Whether the task is a classification task, in which case stratified KFold will be used. kwargs : dict Other parameters for StratifiedShuffleSplit or ShuffleSplit. Returns ------- checked_cv : a cross-validator instance. The return value is a cross-validator which generates the train/test splits via the ``split`` method. """ if cv is None: cv = kwargs.pop('n_splits', 0) or 10 if isinstance(cv, numbers.Integral): if (classifier and (y is not None) and (type_of_target(y) in ('binary', 'multiclass'))): return StratifiedShuffleSplit(cv, **kwargs) else: return ShuffleSplit(cv, **kwargs) if not hasattr(cv, 'split') or isinstance(cv, str): if not isinstance(cv, Iterable) or isinstance(cv, str): raise ValueError("Expected cv as an integer, cross-validation " "object (from sklearn.model_selection) " "or an iterable. Got %s." % cv) return _CVIterableWrapper(cv) return cv # New style cv objects are passed without any modification
def check_averaging(name, y_true, y_true_binarize, y_pred, y_pred_binarize, y_score): is_multilabel = type_of_target(y_true).startswith("multilabel") metric = ALL_METRICS[name] if name in METRICS_WITH_AVERAGING: _check_averaging(metric, y_true, y_pred, y_true_binarize, y_pred_binarize, is_multilabel) elif name in THRESHOLDED_METRICS_WITH_AVERAGING: _check_averaging(metric, y_true, y_score, y_true_binarize, y_score, is_multilabel) else: raise ValueError("Metric is not recorded as having an average option")
def _validate_target(self, y): """ Raises a value error if the target is not a classification target. """ # Ignore None values if y is None: return y_type = type_of_target(y) if y_type not in ("binary", "multiclass"): raise YellowbrickValueError(( "'{}' target type not supported, only binary and multiclass" ).format(y_type))
def woe(X,y,event=1): res_woe = [] iv_dict = {} for feature in X.columns: x = X[feature].values # 判断x 是否为连续变量,如果是,就要进行离散化 if type_of_target(x) == 'continuous': x = discrete(x) woe_dict,iv = woe_single_x(x, y, feature, event) iv_dict[feature] = iv res_woe.append(woe_dict) return iv_dict
def check_target_type(y): """Check the target types to be conform to the current samplers. The current samplers should be compatible with ``'binary'`` and ``'multiclass'`` targets only. Parameters ---------- y : ndarray, The array containing the target Returns ------- y : ndarray, The returned target. """ if type_of_target(y) not in TARGET_KIND: # FIXME: perfectly we should raise an error but the sklearn API does # not allow for it warnings.warn("'y' should be of types {} only. Got {} instead.".format( TARGET_KIND, type_of_target(y))) return y
def check_target_type(y, indicate_one_vs_all=False): """Check the target types to be conform to the current samplers. The current samplers should be compatible with ``'binary'``, ``'multilabel-indicator'`` and ``'multiclass'`` targets only. Parameters ---------- y : ndarray, The array containing the target. indicate_one_vs_all : bool, optional Either to indicate if the targets are encoded in a one-vs-all fashion. Returns ------- y : ndarray, The returned target. is_one_vs_all : bool, optional Indicate if the target was originally encoded in a one-vs-all fashion. Only returned if ``indicate_multilabel=True``. """ type_y = type_of_target(y) if type_y not in TARGET_KIND: # FIXME: perfectly we should raise an error but the sklearn API does # not allow for it warnings.warn("'y' should be of types {} only. Got {} instead.".format( TARGET_KIND, type_of_target(y))) if indicate_one_vs_all: return (y.argmax(axis=1) if type_y == 'multilabel-indicator' else y, type_y == 'multilabel-indicator') else: return y.argmax(axis=1) if type_y == 'multilabel-indicator' else y
def feature_discretion(self, X): ''' Discrete the continuous features of input data X, and keep other features unchanged. :param X : numpy array :return: the numpy array in which all continuous features are discreted ''' temp = [] for i in range(0, X.shape[-1]): x = X[:, i] x_type = type_of_target(x) if x_type == 'continuous': x1 = self.discrete(x) temp.append(x1) else: temp.append(x) return np.array(temp).T
def is_cat(s: pd.Series, consider_ordinal_as_cat): for elem in s: if isinstance(elem, (float, int)): continue else: return True if consider_ordinal_as_cat: if isinstance(s, np.ndarray): s = pd.Series(s) s = s.dropna() if s.dtype == object: s = s.astype('float32') tp = type_of_target(s) if tp in ("multiclass",): return True return False
def test_regression_conversion(self): """ Makes sure that a regression input properly retains the continious target type """ for input_object in [ [1.0, 76.9, 123, 4.0, 81.1], np.array([1.0, 76.9, 123, 4.0, 81.1]), pd.DataFrame([1.0, 76.9, 123, 4.0, 81.1]), ]: validator = InputValidator() y_train = validator.validate_target( input_object, is_classification=False, ) self.assertEqual('continuous', type_of_target(y_train))
def _encode_class_labels(self, y): """ Fit the internal label encoder and return encoded labels. """ self.type_of_target_ = type_of_target(y) if self.type_of_target_ in ("binary", "multiclass"): self.labels_are_encoded = True self.label_encoder_ = LabelEncoder() encoded_y = self.label_encoder_.fit_transform(y) else: msg = ("CascadeForestClassifier is used for binary and multiclass" " classification, wheras the training labels seem not to" " be any one of them.") raise ValueError(msg) return encoded_y
def check_cv( cv: Union[int, Iterable, BaseCrossValidator] = 5, y: Optional[Union[pd.Series, np.ndarray]] = None, stratified: bool = False, random_state: int = 0, ): if cv is None: cv = 5 if isinstance(cv, numbers.Integral): if (stratified and (y is not None) and (type_of_target(y) in ("binary", "multiclass"))): return StratifiedKFold(cv, shuffle=True, random_state=random_state) else: return KFold(cv, shuffle=True, random_state=random_state) return model_selection.check_cv(cv, y, stratified)
def check_binarized_results(y, classes, pos_label, neg_label, expected): for sparse_output in [True, False]: if ((pos_label == 0 or neg_label != 0) and sparse_output): with pytest.raises(ValueError): label_binarize(y, classes=classes, neg_label=neg_label, pos_label=pos_label, sparse_output=sparse_output) continue # check label_binarize binarized = label_binarize(y, classes=classes, neg_label=neg_label, pos_label=pos_label, sparse_output=sparse_output) binarized = binarized.fetch() if hasattr(binarized, 'raw'): binarized = binarized.raw assert_array_equal(toarray(binarized), expected) assert sp.issparse(binarized) == sparse_output # check inverse y_type = type_of_target(y) if y_type == "multiclass": inversed = _inverse_binarize_multiclass(binarized, classes=classes) else: inversed = _inverse_binarize_thresholding( binarized, output_type=y_type, classes=classes, threshold=((neg_label + pos_label) / 2.)) assert_array_equal(toarray(inversed), toarray(y)) # Check label binarizer lb = LabelBinarizer(neg_label=neg_label, pos_label=pos_label, sparse_output=sparse_output) binarized = lb.fit_transform(y) assert_array_equal(toarray(binarized), expected) assert binarized.issparse() == sparse_output inverse_output = lb.inverse_transform(binarized) assert_array_equal(toarray(inverse_output), toarray(y)) assert inverse_output.issparse() == sp.issparse(y)
def _check_X_y(self, X, y, accept_sparse=True): is_2d = hasattr(y, 'shape') and len(y.shape) > 1 and y.shape[1] >= 2 if is_2d or type_of_target(y) != 'binary': raise TypeError("Only binary targets supported. For training " "multiclass or multilabel models, you may use the " "OneVsRest or OneVsAll metaestimators in " "scikit-learn.") X, Y = check_X_y(X, y, dtype=np.double, accept_sparse=accept_sparse, multi_output=False) self.label_binarizer_ = LabelBinarizer(pos_label=1, neg_label=-1) y = self.label_binarizer_.fit_transform(Y).ravel().astype(np.double) return X, y
def fit(self, x, y): """ Parameters ---------- x: np.ndarray y: np.ndarray, 1D """ x, y = check_X_y(x, y, "csc") self.x0 = x self.y0 = y cv = self.cv if isinstance(cv, numbers.Integral): if (type_of_target(y) in ('binary', 'multiclass')): cv = StratifiedKFold(cv) else: cv = KFold(cv) self.kf = list(cv.split(x))
def _make_1st_stage_preds(X, y, X_test): if type_of_target(y) == 'continuous': models = [ SVR(), Ridge(random_state=0), RandomForestRegressor(n_estimators=30, random_state=0) ] else: models = [ SVC(random_state=0), LogisticRegression(random_state=0), RandomForestClassifier(n_estimators=30, random_state=0) ] results = [cross_validate(m, X, y, X_test, cv=5) for m in models] return [r.oof_prediction for r in results], [r.test_prediction for r in results]
def _predict_and_score(self, X_test, y_test): #XXX: Implement type_of_target(y) if(self.predict_proba): y_type = type_of_target(y_test) if(y_type in ('binary')): pred = self.model.predict_proba(X_test)[:,1] else: pred = self.model.predict_proba(X_test) else: pred = self.model.predict(X_test) if(self.multiclass_average == 'binary'): return self.metric(y_test, pred), pred else: return self.metric(y_test, pred, average=self.multiclass_average), pred
def dichotomize_vector(y, n_bins, ordered=False): y = np.squeeze(y) if type_of_target(y) == 'multiclass': print('target could be multiclass!') splitter = MaxentropyMedianDichotomizationTransformer(n_bins) y_unique = np.unique(y) if n_bins < y_unique.shape[0]: splitter.fit(y.reshape(-1, 1)) else: return np.array(map_continuous_names(y)) if ordered: return np.squeeze(splitter.transform_ordered(y.reshape(-1, 1))) else: return np.squeeze(splitter.transform(y.reshape(-1, 1)))
def _is_multilabel(self, y): """ Return whether the given target array corresponds to a multilabel problem. """ temp_y = y.copy() temp_y[np.zeros_like(temp_y, dtype=bool) | (temp_y == -1)] = 1 target_type = type_of_target(temp_y) if target_type in ['binary', 'multiclass']: return False elif target_type == 'multilabel-indicator': return True else: # Raise an error, as in # sklearn.utils.multiclass.check_classification_targets. raise ValueError("Unknown label type: %r" % y)
def _check_data(self, obj_dml_data): if obj_dml_data.z_cols is not None: raise ValueError( 'Incompatible data. ' + ' and '.join(obj_dml_data.z_cols) + ' have been set as instrumental variable(s). ' 'To fit an interactive IV regression model use DoubleMLIIVM instead of DoubleMLIRM.' ) one_treat = (obj_dml_data.n_treat == 1) binary_treat = (type_of_target(obj_dml_data.d) == 'binary') zero_one_treat = np.all((np.power(obj_dml_data.d, 2) - obj_dml_data.d) == 0) if not (one_treat & binary_treat & zero_one_treat): raise ValueError('Incompatible data. ' 'To fit an IRM model with DML ' 'exactly one binary variable with values 0 and 1 ' 'needs to be specified as treatment variable.') return
def test_multiclass_conversion(self): """ Makes sure that a encoded target for classification properly retains the multiclass target type """ # Multiclass conversion for different datatype for input_object in [ [1.0, 2.0, 2.0, 4.0, 3], np.array([1.0, 2.0, 2.0, 4.0, 3], dtype=np.float64), pd.DataFrame([1.0, 2.0, 2.0, 4.0, 3], dtype='category'), ]: validator = InputValidator() y_train = validator.validate_target( input_object, is_classification=True, ) self.assertEqual('multiclass', type_of_target(y_train))
def test_multilabel_conversion(self): """ Makes sure that a encoded target for classification properly retains the multilabel target type """ # Multi-label conversion for different datatype for input_object in [ [[1, 0, 0, 1], [0, 0, 1, 1], [0, 0, 0, 0]], np.array([[1, 0, 0, 1], [0, 0, 1, 1], [0, 0, 0, 0]]), pd.DataFrame([[1, 0, 0, 1], [0, 0, 1, 1], [0, 0, 0, 0]], dtype='category'), ]: validator = InputValidator() y_train = validator.validate_target( input_object, is_classification=True, ) self.assertEqual('multilabel-indicator', type_of_target(y_train))
def _validate_targets(self, y): """ Validates labels for training and testing classifier """ y_ = column_or_1d(y, warn=True) check_classification_targets(y) self.classes_, y = np.unique(y_, return_inverse=True) # Make sure that lables are binary if type_of_target(y) == 'binary': return y else: print("Labels must be binary. That is, +1 or -1")
def validate(self, test_data, test_target): best_model = self.sklearn_class(**self.best_params) best_model.fit(self.data, self.target) prediction = best_model.predict(test_data) score = best_model.score(test_data, test_target) if 'classification' in str(self.primitive_class): type_target = type_of_target(test_target) if type_target == "binary": series_target = pd.Series(test_target) positive_label = series_target.value_counts().index[1] scores_dict = self._classification_scoring( test_target, prediction, average_type='binary', positive_label=positive_label) roc_auc = roc_auc_score(test_target, prediction) scores_dict['roc_auc'] = roc_auc scores_dict['score'] = score elif type_target == "multiclass": scores_dict = self._classification_scoring( test_target, prediction, average_type='macro') scores_dict['score'] = score elif 'regression' in str(self.primitive_class): r2 = r2_score(test_target, prediction) mse = mean_squared_error(test_target, prediction) explained_variance = explained_variance_score( test_target, prediction) scores_dict = { 'optimization_technique': 'hb', 'estimator': str(self.primitive_class), 'dataset': self.dataset_name, 'r2': r2, 'explained_variance_score': explained_variance, 'mean_squared_error': mse, 'max_evals': self.MAX_EVALS, 'total_time': self.run_time, 'best_params': self.best_params, 'score': score } return scores_dict
def test_continuous_multioutput_conversion(self): """ Makes sure that an input for regression properly retains the multiout continious target type """ # Regression multi out conversion for different datatype for input_object in [ [[31.4, 94], [40.5, 109], [25.0, 30]], np.array([[31.4, 94], [40.5, 109], [25.0, 30]]), pd.DataFrame([[31.4, 94], [40.5, 109], [25.0, 30]]), ]: validator = InputValidator() y_train = validator.validate_target( input_object, is_classification=False, ) self.assertEqual('continuous-multioutput', type_of_target(y_train))
def check_holdout(holdout, X, y, classifier=True): is_sparse = sp.issparse(X) if holdout is None: holdout = 0.8 if isinstance(holdout, numbers.Integral): if classifier: if type_of_target(y) in ['binary', 'multiclass']: holdout = StratifiedShuffleSplit(y, train_size=holdout) else: holdout = ShuffleSplit(_num_samples(y), train_size=holdout) else: if not is_sparse: n_samples = len(X) else: n_samples = X.shape[0] holdout = ShuffleSplit(n_samples, train_size=holdout) return holdout
def get_ml_task_from_y(y): from autoflow.constants import binary_classification_task, multiclass_classification_task, \ multilabel_classification_task, regression_task y_type = type_of_target(y) if y_type == "binary": ml_task = binary_classification_task elif y_type == "multiclass": ml_task = multiclass_classification_task elif y_type == "multilabel-indicator": ml_task = multilabel_classification_task elif y_type == "multiclass-multioutput": raise NotImplementedError() elif y_type == "continuous": ml_task = regression_task else: raise NotImplementedError() return ml_task
def compute_metrics(self, targets, predictions, scores=None, target_field=None, prediction_field=None, score_field=None): """ Compute and track metrics for confusion_matrix Parameters ---------- targets : List targets (or actuals) for validation predictions : List predictions (or inferred values) scores : List, optional associated scores for each prediction target_field : str, optional prediction_field : str, optional score_field : str, optional Raises ------ NotImplementedError """ tgt_type = type_of_target(targets) if tgt_type not in ("binary", "multiclass"): raise NotImplementedError("target type not supported yet") # if score are not present set them to 1. if scores is None: scores = np.ones(len(targets)) scores = np.array(scores) # compute confusion_matrix self.metrics.compute_confusion_matrix( predictions=predictions, targets=targets, scores=scores, target_field=target_field, prediction_field=prediction_field, score_field=score_field)
def check_binarized_results(y, classes, pos_label, neg_label, expected): for sparse_output in [True, False]: if ((pos_label == 0 or neg_label != 0) and sparse_output): assert_raises(ValueError, label_binarize, y, classes, neg_label=neg_label, pos_label=pos_label, sparse_output=sparse_output) continue # check label_binarize binarized = label_binarize(y, classes, neg_label=neg_label, pos_label=pos_label, sparse_output=sparse_output) assert_array_equal(toarray(binarized), expected) assert_equal(issparse(binarized), sparse_output) # check inverse y_type = type_of_target(y) if y_type == "multiclass": inversed = _inverse_binarize_multiclass(binarized, classes=classes) else: inversed = _inverse_binarize_thresholding( binarized, output_type=y_type, classes=classes, threshold=((neg_label + pos_label) / 2.)) assert_array_equal(toarray(inversed), toarray(y)) # Check label binarizer lb = LabelBinarizer(neg_label=neg_label, pos_label=pos_label, sparse_output=sparse_output) binarized = lb.fit_transform(y) assert_array_equal(toarray(binarized), expected) assert_equal(issparse(binarized), sparse_output) inverse_output = lb.inverse_transform(binarized) assert_array_equal(toarray(inverse_output), toarray(y)) assert_equal(issparse(inverse_output), issparse(y))
def fit(self, data, **kwargs): """ Fit the regressor to given training data. :param data: DataNode :return: self """ self.metric = 'mse' if self.metric is None else self.metric # Check the task type: {continuous} task_type = type_of_target(data.data[1]) if task_type in type_dict: task_type = type_dict[task_type] else: raise ValueError("Invalid Task Type: %s!" % task_type) self.task_type = task_type super().fit(data) return self
def fit(self, data: DataNode): """ Fit the classifier to given training data. :param data: instance of DataNode :return: self """ self.metric = 'acc' if self.metric is None else self.metric # Check the task type: {binary, multiclass} task_type = type_of_target(data.data[1]) if task_type in type_dict: task_type = type_dict[task_type] else: raise ValueError("Invalid Task Type: %s!" % task_type) self.task_type = task_type super().fit(data) return self
def fit(self, X, y=None): """ Fit the classification model. """ # The target determines what kind of estimator is fit ttype = type_of_target(y) if ttype.startswith(MULTICLASS): self.target_type_ = MULTICLASS elif ttype.startswith(BINARY): self.target_type_ = BINARY else: raise YellowbrickValueError( ("{} does not support target type '{}', " "please provide a binary or multiclass single-output target" ).format(self.__class__.__name__, ttype)) # Fit the model and return self return super(ROCAUC, self).fit(X, y)
def __call__(self, clf, X, y_true, sample_weight=None, lamb=None): """Evaluate decision function output for X relative to y_true. Parameters ---------- clf : object Trained classifier to use for scoring. Must have either a decision_function method or a predict_proba method; the output of that is used to compute the score. X : array-like or sparse matrix Test data that will be fed to clf.decision_function or clf.predict_proba. y_true : array-like Gold standard target values for X. These must be class labels, not decision function values. sample_weight : array-like, optional (default=None) Sample weights. lamb : array, shape (n_lambda,) Values of lambda from lambda_path_ from which to score predictions. Returns ------- score : array, shape (n_lambda,) Score function applied to prediction of estimator on X. """ y_type = type_of_target(y_true) if y_type not in ("binary", "multilabel-indicator"): raise ValueError("{0} format is not supported".format(y_type)) y_pred = clf.decision_function(X, lamb=lamb) if sample_weight is not None: scores = np.apply_along_axis( lambda y_hat: self._score_func( y_true, y_hat, sample_weight=sample_weight, **self._kwargs ), 0, y_pred) else: scores = np.apply_along_axis( lambda y_hat: self._score_func(y_true, y_hat, **self._kwargs), 0, y_pred) return self._sign * scores
def partial_fit(self, X, y=None, forget=False, update_classes=False, compute_output_weights=True) -> ELMClassifier: """Update classifier with a new batch of data. |method_partial_fit| Parameters ---------- X : {array-like, sparse matrix}, shape=[n_samples, n_features] Training input samples y : array-like, shape=[n_samples, n_targets] Training targets forget : boolean, default False |param_forget| update_classes : boolean, default False Include new classes from `y` into the model, assuming they were 0 in all previous samples. compute_output_weights : boolean, optional, default True |param_compute_output_weights| """ #todo: Warning on strongly non-normalized data X, y = check_X_y(X, y, accept_sparse=True, multi_output=True) # init label binarizer if needed if not hasattr(self, 'label_binarizer_'): self.label_binarizer_ = LabelBinarizer() if type_of_target(y).endswith("-multioutput"): self.label_binarizer_ = MultiLabelBinarizer() self.label_binarizer_.fit(self.classes if self.classes is not None else y) if update_classes: self._update_classes(y) y_numeric = self.label_binarizer_.transform(y) if len(y_numeric.shape) > 1 and y_numeric.shape[1] == 1: y_numeric = y_numeric[:, 0] super().partial_fit(X, y_numeric, forget=forget, compute_output_weights=compute_output_weights) return self
def __call__(self, y_true, y_pred, sample_weight=None): """Evaluate predicted target values for X relative to y_true. Parameters ---------- y_true : array-like Gold standard target values for X. y_pred : array-like, [n_samples x n_classes] Model predictions sample_weight : array-like, optional (default=None) Sample weights. Returns ------- score : float Score function applied to prediction of estimator on X. """ if isinstance(y_true, list): y_true = np.array(y_true) if isinstance(y_pred, list): y_pred = np.array(y_pred) type_true = type_of_target(y_true) if len(y_pred.shape ) == 1 or y_pred.shape[1] == 1 or type_true == 'continuous': pass # must be regression, all other task types would return at least two probabilities elif type_true in ['binary', 'multiclass']: y_pred = np.argmax(y_pred, axis=1) elif type_true == 'multilabel-indicator': y_pred[y_pred > 0.5] = 1.0 y_pred[y_pred <= 0.5] = 0.0 else: raise ValueError(type_true) if sample_weight is not None: return self._sign * self._score_func( y_true, y_pred, sample_weight=sample_weight, **self._kwargs) else: return self._sign * self._score_func(y_true, y_pred, ** self._kwargs)
def is_cat(s: Union[pd.Series, np.ndarray], consider_ordinal_as_cat): if not isinstance(s, pd.Series): s = pd.Series(s) if s.dtype == object: for elem in s: if isinstance(elem, (float, int)): continue else: return True s = s.astype('float32') if consider_ordinal_as_cat: valid_types = ["multiclass"] if consider_ordinal_as_cat in (2, "binary"): valid_types += ["binary"] s = s.dropna() tp = type_of_target(s) if tp in valid_types: return True return False
def feature_discretion(self, X, y): """ Discrete the continuous features of input data X, and keep other features unchanged. :param X : numpy array :return: the numpy array in which all continuous features are discrete """ temp, X_interval = [], [] if self._DISCRETION == "percentile_discrete": for i in range(0, X.shape[-1]): x = X[:, i] x_type = type_of_target(x) # logging.info("before: "+" ".join([str(i), str(set(X[:, i])), str(x_type)])) if 0: if x_type == 'continuous': x1, interval = self.percentile_discrete(x, self._WOE_N) X_interval.append(interval) temp.append(x1) # logging.info("continue_after: " + " ".join([str(i), str(set(x1)), str(x1)])) else: temp.append(x) # logging.info("after: " + " ".join([str(i), str(set(x)), str(x)])) else: x1, interval = self.percentile_discrete(x, self._WOE_N) X_interval.append(interval) temp.append(x1) # logging.info("continue_after: " + " ".join([str(i), str(set(x1)), str(x1)])) elif self._DISCRETION == "interval_discrete": for i in range(0, X.shape[-1]): x = X[:, i] # logging.info("before: "+" ".join([str(i), str(set(X[:, i]))])) x1, interval = self.interval_discrete(x, self._WOE_N) X_interval.append(interval) temp.append(x1) # logging.info("interval_after: " + " ".join([str(i), str(set(x1)), str(x1)])) elif self._DISCRETION == "rf_discrete": for i in range(0, X.shape[-1]): x = X[:, i] # logging.info("before: "+" ".join([str(i), str(set(X[:, i]))])) x1, interval = self.rf_discrete(x, y) X_interval.append(interval) temp.append(x1) # logging.info("rf_after: " + " ".join([str(i), str(set(x1)), str(x1)])) return np.array(temp).T, X_interval
def train_nb(X, y): m, n = X.shape p1 = (len(y[y == '是']) + 1) / (m + 2) # 拉普拉斯平滑 p1_list = [] # 用于保存正例下各属性的条件概率 p0_list = [] X1 = X[y == '是'] X0 = X[y == '否'] m1, _ = X1.shape m0, _ = X0.shape for i in range(n): xi = X.iloc[:, i] p_xi = namedtuple(X.columns[i], ['is_continuous', 'conditional_pro']) # 用于储存每个变量的情况 is_continuous = type_of_target(xi) == 'continuous' xi1 = X1.iloc[:, i] xi0 = X0.iloc[:, i] if is_continuous: # 连续值时,conditional_pro 储存的就是 [mean, var] 即均值和方差 xi1_mean = np.mean(xi1) xi1_var = np.var(xi1) xi0_mean = np.mean(xi0) xi0_var = np.var(xi0) p1_list.append(p_xi(is_continuous, [xi1_mean, xi1_var])) p0_list.append(p_xi(is_continuous, [xi0_mean, xi0_var])) else: # 离散值时直接计算各类别的条件概率 unique_value = xi.unique() # 取值情况 nvalue = len(unique_value) # 取值个数 xi1_value_count = pd.value_counts(xi1)[unique_value].fillna( 0) + 1 # 计算正样本中,该属性每个取值的数量,并且加1,即拉普拉斯平滑 xi0_value_count = pd.value_counts(xi0)[unique_value].fillna(0) + 1 p1_list.append( p_xi(is_continuous, np.log(xi1_value_count / (m1 + nvalue)))) p0_list.append( p_xi(is_continuous, np.log(xi0_value_count / (m0 + nvalue)))) return p1, p1_list, p0_list
def check_binarized_results(y, classes, pos_label, neg_label, expected): for sparse_output in [True, False]: if ((pos_label == 0 or neg_label != 0) and sparse_output): assert_raises(ValueError, label_binarize, y, classes, neg_label=neg_label, pos_label=pos_label, sparse_output=sparse_output) continue # check label_binarize binarized = label_binarize(y, classes, neg_label=neg_label, pos_label=pos_label, sparse_output=sparse_output) assert_array_equal(toarray(binarized), expected) assert_equal(issparse(binarized), sparse_output) # check inverse y_type = type_of_target(y) if y_type == "multiclass": inversed = _inverse_binarize_multiclass(binarized, classes=classes) else: inversed = _inverse_binarize_thresholding(binarized, output_type=y_type, classes=classes, threshold=((neg_label + pos_label) / 2.)) assert_array_equal(toarray(inversed), toarray(y)) # Check label binarizer lb = LabelBinarizer(neg_label=neg_label, pos_label=pos_label, sparse_output=sparse_output) binarized = lb.fit_transform(y) assert_array_equal(toarray(binarized), expected) assert_equal(issparse(binarized), sparse_output) inverse_output = lb.inverse_transform(binarized) assert_array_equal(toarray(inverse_output), toarray(y)) assert_equal(issparse(inverse_output), issparse(y))
def __call__(self, clf, X, y_true, sample_weight=None, lamb=None): """Evaluate decision function output for X relative to y_true. Parameters ---------- clf : object Trained classifier to use for scoring. Must have either a decision_function method or a predict_proba method; the output of that is used to compute the score. X : array-like or sparse matrix Test data that will be fed to clf.decision_function or clf.predict_proba. y_true : array-like Gold standard target values for X. These must be class labels, not decision function values. sample_weight : array-like, optional (default=None) Sample weights. lamb : array, shape (n_lambda,) Values of lambda from lambda_path_ from which to score predictions. Returns ------- score : array, shape (n_lambda,) Score function applied to prediction of estimator on X. """ y_type = type_of_target(y_true) if y_type not in ("binary", "multilabel-indicator"): raise ValueError("{0} format is not supported".format(y_type)) y_pred = clf.decision_function(X, lamb=lamb) if sample_weight is not None: scores = np.apply_along_axis(lambda y_hat: self._score_func(y_true, y_hat, sample_weight=sample_weight, **self._kwargs), 0, y_pred) else: scores = np.apply_along_axis(lambda y_hat: self._score_func(y_true, y_hat, **self._kwargs), 0, y_pred) return self._sign * scores
def _sampling_strategy_float(sampling_strategy, y, sampling_type): """Take a proportion of the majority (over-sampling) or minority (under-sampling) class in binary classification.""" type_y = type_of_target(y) if type_y != 'binary': raise ValueError( '"sampling_strategy" can be a float only when the type ' 'of target is binary. For multi-class, use a dict.') target_stats = _count_class_sample(y) if sampling_type == 'over-sampling': n_sample_majority = max(target_stats.values()) class_majority = max(target_stats, key=target_stats.get) sampling_strategy_ = { key: int(n_sample_majority * sampling_strategy - value) for (key, value) in target_stats.items() if key != class_majority } if any([n_samples <= 0 for n_samples in sampling_strategy_.values()]): raise ValueError("The specified ratio required to remove samples " "from the minority class while trying to " "generate new samples. Please increase the " "ratio.") elif (sampling_type == 'under-sampling'): n_sample_minority = min(target_stats.values()) class_minority = min(target_stats, key=target_stats.get) sampling_strategy_ = { key: int(n_sample_minority / sampling_strategy) for (key, value) in target_stats.items() if key != class_minority } if any([n_samples > target_stats[target] for target, n_samples in sampling_strategy_.items()]): raise ValueError("The specified ratio required to generate new " "sample in the majority class while trying to " "remove samples. Please increase the ratio.") else: raise ValueError("'clean-sampling' methods do let the user " "specify the sampling ratio.") return sampling_strategy_
def analyse_results( regular_cv_results, permutation_cv_results, labels, estimator, base_folder=None, analysis_folder='analysis', feature_names=None, learning_task=None, vs_analysis=None, threshold=.75, model_assessment_options=None, score_surfaces_options=None): """Summary and plot generation.""" # learning_task follows the convention of # sklearn.utils.multiclass.type_of_target if learning_task is None: if is_regressor(estimator): learning_task = 'continuous' else: learning_task = type_of_target(labels) # Create an empty dictionary which will contain the key results # of the analysis analysis_summary = dict() # Run the appropriate analysis according to the learning_task is_regression = learning_task.lower() in ('continuous', 'regression') if is_regression: # Perform regression analysis target = 'regression' elif learning_task.lower() == 'multiclass': target = 'multiclass' else: # Perform classification analysis target = 'classification' # Support for empty regular or permutation tests performance_regular = performance_metrics( regular_cv_results, labels, target) performance_permutation = performance_metrics( permutation_cv_results, labels, target) if base_folder is not None and analysis_folder is not None: analysis_folder = os.path.join(base_folder, analysis_folder) if not os.path.exists(analysis_folder): os.makedirs(analysis_folder) # ### Create two separate folders for figures in different formats try: os.mkdir(os.path.join(analysis_folder, 'figures_pdf')) os.mkdir(os.path.join(analysis_folder, 'figures_png')) except OSError: pass # if folder already exists, ignore it else: analysis_folder = None if model_assessment_options is None: model_assessment_options = {} # Handle variable selection step if vs_analysis is not None: # Get feature names if feature_names is None: # what follows creates [feat_0, feat_1, ..., feat_d] # feature_names = 'feat_' + np.arange( # labels.size).astype(str).astype(object) raise ValueError( "Variable selection analysis was specified, but no feature " "names were provided.") feature_names = np.array(feature_names) # force feature names to array if threshold is None: threshold = .75 selected = {} # Init variable selection containers selected['regular'] = dict(zip(feature_names, np.zeros(len(feature_names)))) selected['permutation'] = selected['regular'].copy() n_splits_regular = len((regular_cv_results.values() or [[]])[0]) n_splits_permutation = len((permutation_cv_results.values() or [[]])[0]) n_jobs = {'regular': n_splits_regular, 'permutation': n_splits_permutation} names_ = ('regular', 'permutation') cv_results_ = (regular_cv_results, permutation_cv_results) for batch_name, cv_result in zip(names_, cv_results_): # cv_result['estimator'] is a list containing # the grid-search estimators estimators = cv_result.get('estimator', None) if estimators is None: continue # in case of no permutations skip this iter for estimator in estimators: selected_list = get_selected_list( estimator, vs_analysis) if len(selected_list) < 1: continue selected_variables = feature_names[selected_list] for var in selected_variables: selected[batch_name][var] += 1. / n_jobs[batch_name] # Save selected variables textual summary if analysis_folder is not None: save_signature(os.path.join( analysis_folder, 'signature_%s.txt' % batch_name), selected[batch_name], threshold) # Also save the frequency list as an entry of the analysis summary # Create an empty pandas dataframe to store the frequencies df_tmp = pd.DataFrame(columns=['Frequency']) for k in reversed(sorted( selected[batch_name], key=selected[batch_name].__getitem__)): df_tmp.loc[k] = selected[batch_name][k] * 100 # Add the dataframe to the analysis summary analysis_summary['selection_frequency_{}'.format(batch_name)] = df_tmp feat_arr_r = np.array(list(iteritems(selected['regular'])), dtype=object) feat_arr_p = np.array(list(iteritems(selected['permutation'])), dtype=object) # sort by name feat_arr_r = feat_arr_r[feat_arr_r[:, 0].argsort()] feat_arr_p = feat_arr_p[feat_arr_p[:, 0].argsort()] # Save graphical summary plotting.feature_frequencies( feat_arr_r, analysis_folder, threshold=threshold) plotting.features_manhattan( feat_arr_r, feat_arr_p, analysis_folder, threshold=threshold) plotting.select_over_threshold( feat_arr_r, feat_arr_p, analysis_folder, threshold=threshold) # Generate distribution plots # And save distributions in analysis summary for i, metric in enumerate(performance_regular): plotting.distributions( v_regular=performance_regular[metric], v_permutation=performance_permutation.get(metric, []), base_folder=analysis_folder, metric=metric, first_run=i == 0, is_regression=is_regression) v_regular = performance_regular[metric] v_permutation = performance_permutation.get(metric, []) metric_values = dict() metric_values['values_regular'] = v_regular metric_values['values_permutation'] = v_permutation r_mean, r_sd = np.nanmean(v_regular), np.nanstd(v_regular) p_mean, p_sd = np.nanmean(v_permutation), np.nanstd(v_permutation) rstest = stats.ks_2samp(v_regular, v_permutation) metric_values['mean_regular'] = r_mean metric_values['sd_regular'] = r_sd metric_values['mean_permutation'] = p_mean metric_values['sd_permutation'] = p_sd metric_values['rstest'] = rstest analysis_summary['metric_{}'.format(metric)] = metric_values # Generate surfaces # This has meaning only if the estimator is an istance of GridSearchCV if isinstance(estimator, BaseSearchCV): if score_surfaces_options is None: score_surfaces_options = {} plotting.score_surfaces( param_grid=estimator.param_grid, results=regular_cv_results, base_folder=analysis_folder, is_regression=is_regression, **score_surfaces_options) # Finally, save in the analysis folder the pickled summary if analysis_folder is not None: with open(os.path.join(analysis_folder, 'summary.pkl'), 'w') as af: pkl.dump(analysis_summary, af)
def fit(self, X, y, **kwargs): """ Fit is the entry point for the visualizer. Given instances described by X and binary classes described in the target y, fit performs n trials by shuffling and splitting the dataset then computing the precision, recall, f1, and queue rate scores for each trial. The scores are aggregated by the quantiles expressed then drawn. Parameters ---------- X : ndarray or DataFrame of shape n x m A matrix of n instances with m features y : ndarray or Series of length n An array or series of target or class values. The target y must be a binary classification target. kwargs: dict keyword arguments passed to Scikit-Learn API. Returns ------- self : instance Returns the instance of the visualizer raises: YellowbrickValueError If the target y is not a binary classification target. """ # Check target before metrics raise crazy exceptions if type_of_target(y) != 'binary': raise YellowbrickValueError("multiclass format is not supported") # Make arrays indexable for cross validation X, y = indexable(X, y) # TODO: parallelize trials with joblib (using sklearn utility) # NOTE: parallelization with matplotlib is tricy at best! trials = [ metric for idx in range(self.n_trials) for metric in self._split_fit_score_trial(X, y, idx) ] # Compute maximum number of uniform thresholds across all trials n_thresholds = np.array([len(t['thresholds']) for t in trials]).min() self.thresholds_ = np.linspace(0.0, 1.0, num=n_thresholds) # Filter metrics and collect values for uniform thresholds metrics = frozenset(METRICS) - self._check_exclude(self.exclude) uniform_metrics = defaultdict(list) for trial in trials: rows = defaultdict(list) for t in self.thresholds_: idx = bisect.bisect_left(trial['thresholds'], t) for metric in metrics: rows[metric].append(trial[metric][idx]) for metric, row in rows.items(): uniform_metrics[metric].append(row) # Convert metrics to metric arrays uniform_metrics = { metric: np.array(values) for metric, values in uniform_metrics.items() } # Perform aggregation and store cv_scores_ quantiles = self._check_quantiles(self.quantiles) self.cv_scores_ = {} for metric, values in uniform_metrics.items(): # Compute the lower, median, and upper plots lower, median, upper = mstats.mquantiles( values, prob=quantiles, axis=0 ) # Store the aggregates in cv scores self.cv_scores_[metric] = median self.cv_scores_["{}_lower".format(metric)] = lower self.cv_scores_["{}_upper".format(metric)] = upper # Draw and always return self self.draw() return self
def test_check_classification_targets(): # Test that check_classification_target return correct type. #5782 y = np.array([0.0, 1.1, 2.0, 3.0]) msg = type_of_target(y) assert_raise_message(ValueError, msg, check_classification_targets, y)