def plot_all_roc_curve(self, splitter=None, num_splits=None): "plot ROC curves for all estimators" (clf_list, clf_names) = self.get_component_classifiers() ensemble_classifier = _name_estimators([self.estimator])[0][0] colors = ['black', 'orange', 'blue', 'green', 'yellow', 'magenta'] * 2 linestyles = [':', '--', '-.', '-'] * 3 plt.figure() for clf, label, clr, lsytle in zip(clf_list, clf_names, colors, linestyles): mean_auc, mean_tpr, mean_fpr = self.get_auc(splitter, num_splits, clf) legendString = "{}: Gini = {:.2f}".format(label, 2*mean_auc-1) plt.plot(mean_fpr, mean_tpr, color=clr, linestyle=lsytle, label=legendString, lw=2) # plot the roc curve for ensemble mean_auc, mean_tpr, mean_fpr = self.get_auc(splitter, num_splits, self.estimator) ensemble_classifier = _name_estimators([self.estimator])[0][0] legendString = "{}: Gini = {:.2f}".format(ensemble_classifier, 2*mean_auc-1) plt.plot(mean_fpr, mean_tpr, color='red', linestyle='-', label=legendString, lw=2) # random line plt.plot([0, 1], [0, 1], linestyle='--', color='gray', linewidth=2) plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.xlabel('false positive rate') plt.ylabel('true positive rate') plt.title('Receiver Operator Characteristic') plt.legend(loc="lower right") plt.grid() plt.tight_layout()
def __init__(self, classifiers, meta_classifier, use_probas=False, n_folds=2, use_features_in_secondary=False, stratify=True, random_state=None, shuffle=True, verbose=0): self.classifiers = classifiers self.meta_classifier = meta_classifier self.named_classifiers = { key: value for key, value in _name_estimators(classifiers) } self.named_meta_classifier = { 'meta-%s' % key: value for key, value in _name_estimators([meta_classifier]) } self.use_probas = use_probas self.verbose = verbose self.n_folds = n_folds self.use_features_in_secondary = use_features_in_secondary self.stratify = stratify self.shuffle = shuffle self.random_state = random_state
def fit(self, X, y): """Learn weight coefficients from training data for each classifier. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Training vectors, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape = [n_samples] Target values. Returns ------- self : object """ if isinstance(y, np.ndarray) and len(y.shape) > 1 and y.shape[1] > 1: raise NotImplementedError('Multilabel and multi-output' ' classification is not supported.') if self.voting not in ('soft', 'hard'): raise ValueError( "Voting must be 'soft' or 'hard'; got (voting=%r)" % self.voting) if self.weights and len(self.weights) != len(self.clfs): raise ValueError('Number of classifiers and weights must be equal' '; got %d weights, %d clfs' % (len(self.weights), len(self.clfs))) self.le_ = LabelEncoder() self.le_.fit(y) self.classes_ = self.le_.classes_ self.clfs_ = [clone(clf) for clf in self.clfs] if self.verbose > 0: print("Fitting %d classifiers..." % (len(self.clfs))) for clf in self.clfs_: if self.verbose > 0: i = self.clfs_.index(clf) + 1 print("Fitting clf%d: %s (%d/%d)" % (i, _name_estimators((clf, ))[0][0], i, len(self.clfs_))) if self.verbose > 2: if hasattr(clf, 'verbose'): clf.set_params(verbose=self.verbose - 2) if self.verbose > 1: print(_name_estimators((clf, ))[0][1]) clf.fit(X, self.le_.transform(y)) return self
def fit(self, X, y): """Learn weight coefficients from training data for each classifier. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Training vectors, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape = [n_samples] Target values. Returns ------- self : object """ if isinstance(y, np.ndarray) and len(y.shape) > 1 and y.shape[1] > 1: raise NotImplementedError('Multilabel and multi-output' ' classification is not supported.') if self.voting not in ('soft', 'hard'): raise ValueError("Voting must be 'soft' or 'hard'; got (voting=%r)" % self.voting) if self.weights and len(self.weights) != len(self.clfs): raise ValueError('Number of classifiers and weights must be equal' '; got %d weights, %d clfs' % (len(self.weights), len(self.clfs))) self.le_ = LabelEncoder() self.le_.fit(y) self.classes_ = self.le_.classes_ self.clfs_ = [clone(clf) for clf in self.clfs] if self.verbose > 0: print("Fitting %d classifiers..." % (len(self.clfs))) for clf in self.clfs_: if self.verbose > 0: i = self.clfs_.index(clf) + 1 print("Fitting clf%d: %s (%d/%d)" % (i, _name_estimators((clf,))[0][0], i, len(self.clfs_))) if self.verbose > 2: if hasattr(clf, 'verbose'): clf.set_params(verbose=self.verbose - 2) if self.verbose > 1: print(_name_estimators((clf,))[0][1]) clf.fit(X, self.le_.transform(y)) return self
def __init__(self, regressors, meta_regressor, verbose=0): self.regressors = regressors self.meta_regressor = meta_regressor self.named_regressors = { key: value for key, value in _name_estimators(regressors) } self.named_meta_regressor = { 'meta-%s' % key: value for key, value in _name_estimators([meta_regressor]) } self.verbose = verbose
def __init__( self , classifiers, class_labels = [], vote_method = "majority_vote", weights = None ): """ コンストラクタ(厳密にはイニシャライザ) 引数と同名のオブジェクトの属性を設定する必要あり(上位クラスの BaseEstimator仕様) [Input] classifiers_ : list 分類器のクラスのオブジェクトのリスト """ self.classifiers = classifiers self.class_labels = class_labels self.class_labels_ = class_labels self.weights = weights if classifiers != None: self.__n_classifier = len( classifiers ) else: self.__n_classifier = 0 self.vote_method = vote_method # ? if classifiers != None: self.named_classifiers = { key: value for key, value in _name_estimators(classifiers) } else: self.named_classifiers = {} return
def __init__(self, classifiers, vote = 'classlabel', weights = None): self.classifiers = classifiers self.named_classifiers = { key:value for key, value in _name_estimators(classifiers) } self.vote = vote self.weights = weights
def __init__(self, classifiers, vote='classlabel', weights=None): self.classifiers = classifiers self.named_classifiers = {key: value for key, value in _name_estimators(classifiers)} self.vote = vote self.weights = weights
def kfold_validate(self, estimator=None, score_method=None): ''' K fold cross validation Valid options are ['accuracy', 'adjusted_rand_score', 'average_precision', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc'] ''' if estimator is None: estimator = self.estimator if score_method is None: score_method = self.score_method estimator_name = _name_estimators([estimator])[0][0] scores = cross_val_score(estimator=estimator, X=self.X, y=self.y, cv=self.cv, scoring = score_method, n_jobs=self.n_jobs) self._print_title(estimator_name) print("{:d}-Fold {} score: {:.3f} +/- {:.3f}".format(self.cv, score_method, np.mean(scores), np.std(scores))) return (scores)
def make_pipeline(*steps): """Construct a Pipeline from the given estimators. This is a shorthand for the Pipeline constructor; it does not require, and does not permit, naming the estimators. Instead, their names will be set to the lowercase of their types automatically. Parameters ---------- *steps : list List of estimators. Returns ------- p : Pipeline Examples -------- >>> from kenchi.outlier_detection import MiniBatchKMeans >>> from kenchi.pipeline import make_pipeline >>> from sklearn.preprocessing import StandardScaler >>> scaler = StandardScaler() >>> det = MiniBatchKMeans() >>> pipeline = make_pipeline(scaler, det) """ return Pipeline(_name_estimators(steps))
def make_union(*transformers, **kwargs): """Construct a FeatureUnion from the given transformers. This is a shorthand for the FeatureUnion constructor; it does not require, and does not permit, naming the transformers. Instead, they will be given names automatically based on their types. It also does not allow weighting. Parameters ---------- *transformers : list of estimators n_jobs : int or None, optional (default=None) Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary <n_jobs>` for more details. Returns ------- f : FeatureUnion """ n_jobs = kwargs.pop('n_jobs', None) if kwargs: # We do not currently support `transformer_weights` as we may want to # change its type spec in make_union raise TypeError('Unknown keyword arguments: "{}"'.format( list(kwargs.keys())[0])) return FeatureUnion(_name_estimators(transformers), n_jobs=n_jobs)
def make_pipeline(*steps): """Construct a Pipeline from the given estimators. This is a shorthand for the Pipeline constructor; it does not require, and does not permit, naming the estimators. Instead, their names will be set to the lowercase of their types automatically. Parameters ---------- *steps : list of estimators. See Also -------- sklearn.pipeline.Pipeline : Class for creating a pipeline of transforms with a final estimator. Examples -------- >>> from sklearn.naive_bayes import GaussianNB >>> from sklearn.preprocessing import StandardScaler >>> make_pipeline(StandardScaler(), GaussianNB(priors=None)) Pipeline(steps=[('standardscaler', StandardScaler()), ('gaussiannb', GaussianNB())]) Returns ------- p : Pipeline """ return Scaler(_name_estimators(steps))
def make_pipeline(*steps, **kwargs): """Construct a Pipeline from the given estimators. This is a shorthand for the Pipeline constructor; it does not require, and does not permit, naming the estimators. Instead, their names will be set to the lowercase of their types automatically. Parameters ---------- *steps : list of estimators. memory : None, str or object with the joblib.Memory interface, optional Used to cache the fitted transformers of the pipeline. By default, no caching is performed. If a string is given, it is the path to the caching directory. Enabling caching triggers a clone of the transformers before fitting. Therefore, the transformer instance given to the pipeline cannot be inspected directly. Use the attribute ``named_steps`` or ``steps`` to inspect estimators within the pipeline. Caching the transformers is advantageous when fitting is time consuming. Returns ------- p : Pipeline """ memory = kwargs.pop('memory', None) if kwargs: raise TypeError('Unknown keyword arguments: "{}"'.format( list(kwargs.keys())[0])) return Pipeline(_name_estimators(steps), memory=memory)
def __init__(self, clfs, voting='hard', weights=None, verbose=0): self.clfs = clfs self.named_clfs = {key: value for key, value in _name_estimators(clfs)} self.voting = voting self.weights = weights self.verbose = verbose
def __init__( self, regressors, weights = None, fitting = None, clone = False, debug = False ): """ Args : regressors : list <regressors オブジェクト> 回帰器のオブジェクトのリスト weights : list <float> 各回帰器の対する重みの値のリスト : __init()__ の引数と同名のオブジェクトの属性 fitting : list<bool> 各回帰器の対する学習を行うかのフラグのリスト """ self.regressors = regressors self.fitted_regressors = regressors self.weights = weights self.clone = clone self.debug = debug if regressors != None: self.n_classifier = len( regressors ) else: self.n_classifier = 0 # regressors で指定した各オブジェクトの名前 if regressors != None: self.named_regressors = { key: value for key, value in _name_estimators(regressors) } else: self.named_regressors = {} if( self.debug ): for i, named_classifier in enumerate(self.named_regressors): print( "name {} : {}".format(i, self.named_regressors[named_classifier]) ) return
def __init__( self, regressors, final_regressors, second_regressors = None, n_splits = 4, clone = False, seed = 72 ): self.regressors = regressors self.fitted_regressors = regressors self.final_regressors = final_regressors self.second_regressors = second_regressors self.fitted_second_regressors = second_regressors self.n_classifier = len( regressors ) if( second_regressors != None ): self.n_second_regressors = len( second_regressors ) else: self.n_second_regressors = 0 self.n_splits = n_splits self.clone = clone self.seed = seed self.accuracy = None # classifiers で指定した各オブジェクトの名前 if regressors != None: self.named_regressors = { key: value for key, value in _name_estimators(regressors) } else: self.named_regressors = {} for i, named_regressor in enumerate(self.named_regressors): print( "name {} : {}".format(i, self.named_regressors[named_regressor]) ) return
def __init__(self, classifiers, vote='classlabel', weights=None): self.classifiers = classifiers # list of classifiers self.vote = vote # 'probability' or 'classlabel' self.named_classifiers = { key: value for key, value in _name_estimators(classifiers) } self.weights = weights # weights for each of the classifiers
def __init__(self, classifiers, meta_classifier, use_probas=False, verbose=0): self.classifiers = classifiers self.meta_classifier = meta_classifier self.named_classifiers = { key: value for key, value in _name_estimators(classifiers) } self.named_meta_classifier = { 'meta-%s' % key: value for key, value in _name_estimators([meta_classifier]) } self.use_probas = use_probas self.verbose = verbose
def __init__(self, clfs, voting='hard', weights=None): """ voting: if 'hard', uses predicted class labels for majority rule voting if 'soft', predicts the class label based on the argmax of the sums of the predicted probalities """ self.clfs = clfs self.named_clfs = {key:value for key, value in _name_estimators(clfs)} self.voting = voting self.weights = weights
def __init__(self, clfs, voting, weights=None, threshold=None): self.clfs = clfs self.named_clfs = {key: value for key, value in _name_estimators(clfs)} self.voting = voting if voting is 'weighted': self.combiner = WeightedVote(weights=weights, threshold=threshold) elif voting is 'majority': self.combiner = MajorityVote() else: raise AttributeError('Unrecognized voting method')
def __init__(self, clfs, voting='hard', weights=None): """ voting: if 'hard', uses predicted class labels for majority rule voting if 'soft', predicts the class label based on the argmax of the sums of the predicted probalities """ self.clfs = clfs # _name_estimators([LogisticRegression()]) ==> [('logisticregression', LogisticRegression(C=1.0, ....))] self.named_clfs = {key:value for key, value in _name_estimators(clfs)} self.voting = voting self.weights = weights
def __init__(self, classifiers, vote='classlabel', weights=None): self.classifiers = classifiers # named_classifiers([..]) リストの中身は識別器で, # この関数はそれを識別器の名前(sklearnが独自に決めた識別器のクラスの小文字)と, # 識別器オブジェクトに分けられ,タプルのリストとなっている. self.named_classifiers = {key: value for key, value in _name_estimators(classifiers)} self.vote = vote self.weights = weights
def __init__(self, clfs, voting, weights=None, threshold=None): self.clfs = clfs self.named_clfs = {key:value for key,value in _name_estimators(clfs)} self.voting=voting if voting is 'weighted': self.combiner=WeightedVote(weights=weights, threshold=threshold) elif voting is 'majority': self.combiner=MajorityVote() else: raise AttributeError('Unrecognized voting method')
def make_pipeline(*steps, **kwargs): """Construct a Pipeline from the given estimators. This is a shorthand for the Pipeline constructor; it does not require, and does not permit, naming the estimators. Instead, their names will be set to the lowercase of their types automatically. Parameters ---------- *steps : list of estimators. memory : None, str or object with the joblib.Memory interface, optional Used to cache the fitted transformers of the pipeline. By default, no caching is performed. If a string is given, it is the path to the caching directory. Enabling caching triggers a clone of the transformers before fitting. Therefore, the transformer instance given to the pipeline cannot be inspected directly. Use the attribute ``named_steps`` or ``steps`` to inspect estimators within the pipeline. Caching the transformers is advantageous when fitting is time consuming. verbose : boolean, optional (default=False) If True, the time elapsed while fitting each step will be printed as it is completed. Returns ------- p : Pipeline See also -------- imblearn.pipeline.Pipeline : Class for creating a pipeline of transforms with a final estimator. Examples -------- >>> from sklearn.naive_bayes import GaussianNB >>> from sklearn.preprocessing import StandardScaler >>> make_pipeline(StandardScaler(), GaussianNB(priors=None)) ... # doctest: +NORMALIZE_WHITESPACE Pipeline(memory=None, steps=[('standardscaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('gaussiannb', GaussianNB(priors=None, var_smoothing=1e-09))], verbose=False) """ memory = kwargs.pop("memory", None) verbose = kwargs.pop('verbose', False) if kwargs: raise TypeError('Unknown keyword arguments: "{}"'.format( list(kwargs.keys())[0])) return Pipeline(pipeline._name_estimators(steps), memory=memory, verbose=verbose)
def make_union(*transformers, **kwargs): n_jobs = kwargs.pop('n_jobs', None) verbose = kwargs.pop('verbose', False) if kwargs: # We do not currently support `transformer_weights` as we may want to # change its type spec in make_union raise TypeError('Unknown keyword arguments: "{}"'.format( list(kwargs.keys())[0])) return PandasFeatureUnion(_name_estimators(transformers), n_jobs=n_jobs, verbose=verbose)
def fit(self, X, y): """Learn weight coefficients from training data for each regressor. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Training vectors, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape = [n_samples] Target values. Returns ------- self : object """ self.regr_ = [clone(regr) for regr in self.regressors] self.meta_regr_ = clone(self.meta_regressor) if self.verbose > 0: print("Fitting %d regressors..." % (len(self.regressors))) for regr in self.regr_: if self.verbose > 0: i = self.regr_.index(regr) + 1 print("Fitting regressor%d: %s (%d/%d)" % (i, _name_estimators( (regr, ))[0][0], i, len(self.regr_))) if self.verbose > 2: if hasattr(regr, 'verbose'): regr.set_params(verbose=self.verbose - 2) if self.verbose > 1: print(_name_estimators((regr, ))[0][1]) regr.fit(X, y) meta_features = self._predict_meta_features(X) self.meta_regr_.fit(meta_features, y) return self
def _get_transformer_list(estimators): """ Construct (name, trans, column) tuples from list """ message = ('`make_column_transformer` expects (transformer, columns)') transformers, columns = zip(*estimators) names, _ = zip(*_name_estimators(transformers)) transformer_list = list(zip(names, transformers, columns)) return transformer_list
def make_pipeline(*steps): """Construct a Pipeline from the given estimators. This is a shorthand for the Pipeline constructor; it does not require, and does not permit, naming the estimators. Instead, their names will be set to the lowercase of their types automatically. Returns ------- p : Pipeline """ return Pipeline(pipeline._name_estimators(steps))
def get_all_performance_metrics(self, metrics=None): "get metrics for all classifiers" (clf_list, clf_names) = self.get_component_classifiers() ensemble_classifier = _name_estimators([self.estimator])[0][0] for clf, label in zip(clf_list, clf_names): print("\n{}\n{}\n{}\n".format("*"*60, label, "*"*60)) self.get_performance_metrics(metrics, clf) print("\n{}\n{}\n{}\n".format("*"*60, ensemble_classifier, "*"*60)) self.get_performance_metrics(metrics, self.estimator)
def fit(self, X, y): """ Fit ensemble classifers and the meta-classifier. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Training vectors, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape = [n_samples] Target values. Returns ------- self : object """ self.clfs_ = [clone(clf) for clf in self.classifiers] self.meta_clf_ = clone(self.meta_classifier) if self.verbose > 0: print("Fitting %d classifiers..." % (len(self.classifiers))) for clf in self.clfs_: if self.verbose > 0: i = self.clfs_.index(clf) + 1 print("Fitting classifier%d: %s (%d/%d)" % (i, _name_estimators((clf, ))[0][0], i, len(self.clf_))) if self.verbose > 2: if hasattr(clf, 'verbose'): clf.set_params(verbose=self.verbose - 2) if self.verbose > 1: print(_name_estimators((clf, ))[0][1]) clf.fit(X, y) meta_features = self._predict_meta_features(X) self.meta_clf_.fit(meta_features, y) return self
def __init__(self, classifiers, vote='probability', weights=None, method='majority_voting'): self.classifiers = classifiers self.named_classifiers = { k: v for k, v in _name_estimators(classifiers) } self.vote = vote self.weights = weights self.method = method
def __init__(self, classifiers, weights=None, fitting=None, vote_method="majority_vote"): """ Args : classifiers : list <classifier オブジェクト> 分類器のクラスのオブジェクトのリスト weights : list <float> 各分類器の対する重みの値のリスト : __init()__ の引数と同名のオブジェクトの属性 fitting : list<bool> 各分類器の対する学習を行うかのフラグのリスト vote_method : str ( "majority_vote" or "probability_vote" ) アンサンブルによる最終的な判断判断手法 : __init()__ の引数と同名のオブジェクトの属性 "majority_vote" : 弱識別器の多数決で決定する.多数決方式 (=クラスラベルの argmax() 結果) "probability_vote" : 弱識別器の重み付け結果で決定する.(=クラスの所属確率の argmax() 結果) """ self.classifiers = classifiers self.fitting = fitting self.fitted_classifiers = classifiers self.weights = weights self.n_classes = 0 if classifiers != None: self.n_classifier = len(classifiers) else: self.n_classifier = 0 self.vote_method = vote_method self.encoder = LabelEncoder() # classifiers で指定した各オブジェクトの名前 if classifiers != None: self.named_classifiers = { key: value for key, value in _name_estimators(classifiers) } else: self.named_classifiers = {} for i, named_classifier in enumerate(self.named_classifiers): print("name {} : {}".format( i, self.named_classifiers[named_classifier])) if fitting == None: fitting = [] for i in range(len(self.classifiers)): fitting.append(True) return
def __init__(self, classifiers, vote='classlabel', weights=None): """ Constructor """ self.classifiers = classifiers self.named_classifiers = { key: value for key, value in _name_estimators(classifiers) } self.vote = vote self.weights = weights self.lablenc_ = LabelEncoder() self.classifiers_ = [] self.classes_ = []
def __init__( self, estimator=None, output_dtype=float, output_dims=((None, np.nan),), fit_input="data", transform_input="data", estimator_name=None, model_path=None, features_dir=None, extension=".hdf5", save_func=None, load_func=None, dataset_map=None, input_dask_array=False, fit_kwargs=None, **kwargs, ): super().__init__(**kwargs) self.estimator = estimator self.output_dtype = output_dtype if not all(len(d) == 2 for d in output_dims): raise ValueError( "output_dims must be an iterable of size 2 tuples " f"(dim_name, dim_size), not {output_dims}" ) self.output_dims = output_dims self.fit_input = fit_input self.transform_input = transform_input if estimator_name is None: estimator_name = _name_estimators([estimator])[0][0] self.estimator_name = estimator_name self.model_path = model_path self.features_dir = features_dir self.extension = extension estimator_save_fn = ( None if estimator is None else estimator._get_tags().get("bob_features_save_fn") ) estimator_load_fn = ( None if estimator is None else estimator._get_tags().get("bob_features_load_fn") ) self.save_func = save_func or estimator_save_fn or save self.load_func = load_func or estimator_load_fn or load self.dataset_map = dataset_map self.input_dask_array = input_dask_array self.fit_kwargs = fit_kwargs or {}
def make_debug_pipeline(*steps, **kwargs): """Construct a DebugPipeline from the given estimators. This is a shorthand for the DebugPipeline constructor; it does not require, and does not permit, naming the estimators. Instead, their names will be set to the lowercase of their types automatically. Parameters ---------- *steps : list of estimators. memory : None, str or object with the joblib.Memory interface, optional Used to cache the fitted transformers of the pipeline. By default, no caching is performed. If a string is given, it is the path to the caching directory. Enabling caching triggers a clone of the transformers before fitting. Therefore, the transformer instance given to the pipeline cannot be inspected directly. Use the attribute ``named_steps`` or ``steps`` to inspect estimators within the pipeline. Caching the transformers is advantageous when fitting is time consuming. verbose : boolean, default=False If True, the time elapsed while fitting each step will be printed as it is completed. log_callback: string, default=None. The callback function that logs information in between each intermediate step. Defaults to None. If set to `'default'`, :func:`default_log_callback` is used. See :func:`default_log_callback` for an example. See Also -------- sklego.pipeline.DebugPipeline : Class for creating a pipeline of transforms with a final estimator. Examples -------- >>> from sklearn.naive_bayes import GaussianNB >>> from sklearn.preprocessing import StandardScaler >>> make_debug_pipeline(StandardScaler(), GaussianNB(priors=None)) DebugPipeline(steps=[('standardscaler', StandardScaler()), ('gaussiannb', GaussianNB())]) Returns ------- p : DebugPipeline """ memory = kwargs.pop('memory', None) verbose = kwargs.pop('verbose', False) log_callback = kwargs.pop('log_callback', None) if kwargs: raise TypeError('Unknown keyword arguments: "{}"' .format(list(kwargs.keys())[0])) return DebugPipeline(_name_estimators(steps), memory=memory, verbose=verbose, log_callback=log_callback)
def make_pipeline(*steps, **kwargs): """Construct a Pipeline from the given estimators. This is a shorthand for the Pipeline constructor; it does not require, and does not permit, naming the estimators. Instead, their names will be set to the lowercase of their types automatically. Parameters ---------- *steps : list of estimators. memory : None, str or object with the joblib.Memory interface, optional Used to cache the fitted transformers of the pipeline. By default, no caching is performed. If a string is given, it is the path to the caching directory. Enabling caching triggers a clone of the transformers before fitting. Therefore, the transformer instance given to the pipeline cannot be inspected directly. Use the attribute ``named_steps`` or ``steps`` to inspect estimators within the pipeline. Caching the transformers is advantageous when fitting is time consuming. Returns ------- p : Pipeline See also -------- imblearn.pipeline.Pipeline : Class for creating a pipeline of transforms with a final estimator. Examples -------- >>> from sklearn.naive_bayes import GaussianNB >>> from sklearn.preprocessing import StandardScaler >>> make_pipeline(StandardScaler(), GaussianNB(priors=None)) ... # doctest: +NORMALIZE_WHITESPACE Pipeline(memory=None, steps=[('standardscaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('gaussiannb', GaussianNB(priors=None, var_smoothing=1e-09))]) """ memory = kwargs.pop('memory', None) if kwargs: raise TypeError('Unknown keyword arguments: "{}"' .format(list(kwargs.keys())[0])) return Pipeline(pipeline._name_estimators(steps), memory=memory)
def make_sparkunion(*transformers): """Construct a FeatureUnion from the given transformers. This is a shorthand for the FeatureUnion constructor; it does not require, and does not permit, naming the transformers. Instead, they will be given names automatically based on their types. It also does not allow weighting. Examples -------- >>> from sklearn.decomposition import PCA, TruncatedSVD >>> make_union(PCA(), TruncatedSVD()) # doctest: +NORMALIZE_WHITESPACE FeatureUnion(n_jobs=1, transformer_list=[('pca', PCA(copy=True, n_components=None, whiten=False)), ('truncatedsvd', TruncatedSVD(algorithm='randomized', n_components=2, n_iter=5, random_state=None, tol=0.0))], transformer_weights=None) Returns ------- f : FeatureUnion """ return SparkFeatureUnion(_name_estimators(transformers))
def __init__(self, clfs, voting="hard", weights=None): self.clfs = clfs self.named_clfs = {key: value for key, value in _name_estimators(clfs)} self.voting = voting self.weights = weights
def transform(self, X, y=None): xform_data = self.transform_.transform(X, y) return np.append(X, xform_data, axis=1) class LogExpPipeline(Pipeline): def fit(self, X, y): super(LogExpPipeline, self).fit(X, np.log1p(y)) def predict(self, X): return np.expm1(super(LogExpPipeline, self).predict(X)) # # Model/pipeline with scaling,pca,svm # knn knn_pipe = LogExpPipeline(_name_estimators([RobustScaler(), KNeighborsRegressor(n_neighbors = 15, metric = 'cityblock')])) # svm_pipe = LogExpPipeline(_name_estimators([RobustScaler(), SVR(kernel='rbf', C=30, epsilon=0.05)])) # results = cross_val_score(svm_pipe, train, y_train, cv=5, scoring='r2') # print("SVM score: %.4f (%.4f)" % (results.mean(), results.std())) # exit() # # Model/pipeline with scaling,pca,ElasticNet # en = ElasticNet(alpha=0.01, l1_ratio=0.9) # # XGBoost model
def transform(self, X, y=None): xform_data = self.transform_.transform(X, y) return np.append(X, xform_data, axis=1) class LogExpPipeline(Pipeline): def fit(self, X, y): super(LogExpPipeline, self).fit(X, y) def predict(self, X): return super(LogExpPipeline, self).predict(X) # # Model/pipeline with scaling,pca,svm # knn knn_pipe = LogExpPipeline(_name_estimators([RobustScaler(), KNeighborsClassifier(n_neighbors = 15, metric = 'cityblock')])) # svm_pipe = LogExpPipeline(_name_estimators([RobustScaler(), SVC(kernel='rbf', C=14)])) # results = cross_val_score(svm_pipe, train, y_train, cv=5, scoring='r2') # print("SVM score: %.4f (%.4f)" % (results.mean(), results.std())) # exit() # # XGBoost model # xgb_model = xgb.XGBClassifier(max_depth=4, learning_rate=0.0045, subsample=0.921,nthread=6, objective='multi:softmax', n_estimators=500)
def __init__(self, classifiers): self.classifiers = classifiers self.named_classifiers = {key: value for key, value in _name_estimators(classifiers)}
def make_alpha_pipeline(*steps): return AlphaPipeline(_name_estimators(steps))
def make_dataframe_pipeline(steps): """Construct a DataFramePipeline from the given estimators.""" return DataFramePipeline(_name_estimators(steps))
def make_transformer_pipeline(*steps): """Construct a TransformerPipeline from the given estimators. """ return TransformerPipeline(_name_estimators(steps))