Exemple #1
0
        def plot_all_roc_curve(self, splitter=None, num_splits=None):
            "plot ROC curves for all estimators"

            (clf_list, clf_names) = self.get_component_classifiers()
            ensemble_classifier = _name_estimators([self.estimator])[0][0]

            colors = ['black', 'orange', 'blue', 'green', 'yellow', 'magenta'] * 2
            linestyles = [':', '--', '-.', '-'] * 3
            plt.figure()
            for clf, label, clr, lsytle in zip(clf_list, clf_names, colors, linestyles):
                mean_auc, mean_tpr, mean_fpr = self.get_auc(splitter, num_splits, clf)

                legendString = "{}: Gini = {:.2f}".format(label, 2*mean_auc-1)
                plt.plot(mean_fpr, mean_tpr, color=clr, linestyle=lsytle,
                             label=legendString, lw=2)

            # plot the roc curve for ensemble
            mean_auc, mean_tpr, mean_fpr = self.get_auc(splitter, num_splits, self.estimator)
            ensemble_classifier = _name_estimators([self.estimator])[0][0]

            legendString = "{}: Gini = {:.2f}".format(ensemble_classifier, 2*mean_auc-1)
            plt.plot(mean_fpr, mean_tpr, color='red', linestyle='-',
                             label=legendString, lw=2)

            # random line
            plt.plot([0, 1], [0, 1], linestyle='--', color='gray', linewidth=2)

            plt.xlim([-0.05, 1.05])
            plt.ylim([-0.05, 1.05])
            plt.xlabel('false positive rate')
            plt.ylabel('true positive rate')
            plt.title('Receiver Operator Characteristic')
            plt.legend(loc="lower right")
            plt.grid()
            plt.tight_layout()
Exemple #2
0
    def __init__(self,
                 classifiers,
                 meta_classifier,
                 use_probas=False,
                 n_folds=2,
                 use_features_in_secondary=False,
                 stratify=True,
                 random_state=None,
                 shuffle=True,
                 verbose=0):

        self.classifiers = classifiers
        self.meta_classifier = meta_classifier
        self.named_classifiers = {
            key: value
            for key, value in _name_estimators(classifiers)
        }
        self.named_meta_classifier = {
            'meta-%s' % key: value
            for key, value in _name_estimators([meta_classifier])
        }
        self.use_probas = use_probas
        self.verbose = verbose
        self.n_folds = n_folds
        self.use_features_in_secondary = use_features_in_secondary
        self.stratify = stratify
        self.shuffle = shuffle
        self.random_state = random_state
Exemple #3
0
    def fit(self, X, y):
        """Learn weight coefficients from training data for each classifier.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples and
            n_features is the number of features.

        y : array-like, shape = [n_samples]
            Target values.

        Returns
        -------
        self : object

        """
        if isinstance(y, np.ndarray) and len(y.shape) > 1 and y.shape[1] > 1:
            raise NotImplementedError('Multilabel and multi-output'
                                      ' classification is not supported.')

        if self.voting not in ('soft', 'hard'):
            raise ValueError(
                "Voting must be 'soft' or 'hard'; got (voting=%r)" %
                self.voting)

        if self.weights and len(self.weights) != len(self.clfs):
            raise ValueError('Number of classifiers and weights must be equal'
                             '; got %d weights, %d clfs' %
                             (len(self.weights), len(self.clfs)))

        self.le_ = LabelEncoder()
        self.le_.fit(y)
        self.classes_ = self.le_.classes_
        self.clfs_ = [clone(clf) for clf in self.clfs]

        if self.verbose > 0:
            print("Fitting %d classifiers..." % (len(self.clfs)))

        for clf in self.clfs_:

            if self.verbose > 0:
                i = self.clfs_.index(clf) + 1
                print("Fitting clf%d: %s (%d/%d)" %
                      (i, _name_estimators((clf, ))[0][0], i, len(self.clfs_)))

            if self.verbose > 2:
                if hasattr(clf, 'verbose'):
                    clf.set_params(verbose=self.verbose - 2)

            if self.verbose > 1:
                print(_name_estimators((clf, ))[0][1])

            clf.fit(X, self.le_.transform(y))
        return self
Exemple #4
0
    def fit(self, X, y):
        """Learn weight coefficients from training data for each classifier.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples and
            n_features is the number of features.

        y : array-like, shape = [n_samples]
            Target values.

        Returns
        -------
        self : object

        """
        if isinstance(y, np.ndarray) and len(y.shape) > 1 and y.shape[1] > 1:
            raise NotImplementedError('Multilabel and multi-output'
                                      ' classification is not supported.')

        if self.voting not in ('soft', 'hard'):
            raise ValueError("Voting must be 'soft' or 'hard'; got (voting=%r)"
                             % self.voting)

        if self.weights and len(self.weights) != len(self.clfs):
            raise ValueError('Number of classifiers and weights must be equal'
                             '; got %d weights, %d clfs'
                             % (len(self.weights), len(self.clfs)))

        self.le_ = LabelEncoder()
        self.le_.fit(y)
        self.classes_ = self.le_.classes_
        self.clfs_ = [clone(clf) for clf in self.clfs]

        if self.verbose > 0:
            print("Fitting %d classifiers..." % (len(self.clfs)))

        for clf in self.clfs_:

            if self.verbose > 0:
                i = self.clfs_.index(clf) + 1
                print("Fitting clf%d: %s (%d/%d)" %
                      (i, _name_estimators((clf,))[0][0], i, len(self.clfs_)))

            if self.verbose > 2:
                if hasattr(clf, 'verbose'):
                    clf.set_params(verbose=self.verbose - 2)

            if self.verbose > 1:
                print(_name_estimators((clf,))[0][1])

            clf.fit(X, self.le_.transform(y))
        return self
Exemple #5
0
    def __init__(self, regressors, meta_regressor, verbose=0):

        self.regressors = regressors
        self.meta_regressor = meta_regressor
        self.named_regressors = {
            key: value
            for key, value in _name_estimators(regressors)
        }
        self.named_meta_regressor = {
            'meta-%s' % key: value
            for key, value in _name_estimators([meta_regressor])
        }
        self.verbose = verbose
    def __init__( self , classifiers, class_labels = [], vote_method = "majority_vote", weights = None ):
        """
        コンストラクタ(厳密にはイニシャライザ)
        引数と同名のオブジェクトの属性を設定する必要あり(上位クラスの BaseEstimator仕様)
        
        [Input]
            classifiers_ : list
                分類器のクラスのオブジェクトのリスト
                

        """
        self.classifiers = classifiers
        
        self.class_labels = class_labels
        self.class_labels_ = class_labels

        self.weights = weights

        if classifiers != None:
            self.__n_classifier = len( classifiers )
        else:
            self.__n_classifier = 0

        self.vote_method = vote_method

        # ?
        if classifiers != None:
            self.named_classifiers = { key: value 
                                         for key, value in _name_estimators(classifiers) }
        else:
            self.named_classifiers = {}

        return
    def __init__(self, classifiers, vote = 'classlabel', weights = None):

        self.classifiers = classifiers
        self.named_classifiers = { key:value for key, value
                                   in _name_estimators(classifiers) }
        self.vote = vote
        self.weights = weights
	def __init__(self, classifiers, vote='classlabel',
				weights=None):
		self.classifiers = classifiers
		self.named_classifiers = {key: value for key, value in
									_name_estimators(classifiers)}
		self.vote = vote
		self.weights = weights
Exemple #9
0
    def kfold_validate(self, estimator=None, score_method=None):
        '''
            K fold cross validation

            Valid options are ['accuracy', 'adjusted_rand_score', 'average_precision',
                'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss',
                'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error',
                'precision', 'precision_macro', 'precision_micro', 'precision_samples',
                'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro',
                'recall_samples', 'recall_weighted', 'roc_auc']

        '''

        if estimator is None:
            estimator = self.estimator
        if score_method is None:
            score_method = self.score_method

        estimator_name = _name_estimators([estimator])[0][0]

        scores = cross_val_score(estimator=estimator,
                                 X=self.X,
                                 y=self.y,
                                 cv=self.cv,
                                 scoring = score_method,
                                 n_jobs=self.n_jobs)

        self._print_title(estimator_name)
        print("{:d}-Fold {} score: {:.3f} +/- {:.3f}".format(self.cv, score_method,
                                                          np.mean(scores), np.std(scores)))
        return (scores)
Exemple #10
0
def make_pipeline(*steps):
    """Construct a Pipeline from the given estimators. This is a shorthand for
    the Pipeline constructor; it does not require, and does not permit, naming
    the estimators. Instead, their names will be set to the lowercase of their
    types automatically.

    Parameters
    ----------
    *steps : list
        List of estimators.

    Returns
    -------
    p : Pipeline

    Examples
    --------
    >>> from kenchi.outlier_detection import MiniBatchKMeans
    >>> from kenchi.pipeline import make_pipeline
    >>> from sklearn.preprocessing import StandardScaler
    >>> scaler = StandardScaler()
    >>> det = MiniBatchKMeans()
    >>> pipeline = make_pipeline(scaler, det)
    """

    return Pipeline(_name_estimators(steps))
Exemple #11
0
def make_union(*transformers, **kwargs):
    """Construct a FeatureUnion from the given transformers.

    This is a shorthand for the FeatureUnion constructor; it does not require,
    and does not permit, naming the transformers. Instead, they will be given
    names automatically based on their types. It also does not allow weighting.

    Parameters
    ----------
    *transformers : list of estimators

    n_jobs : int or None, optional (default=None)
        Number of jobs to run in parallel.
        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
        for more details.

    Returns
    -------
    f : FeatureUnion

    """
    n_jobs = kwargs.pop('n_jobs', None)

    if kwargs:
        # We do not currently support `transformer_weights` as we may want to
        # change its type spec in make_union
        raise TypeError('Unknown keyword arguments: "{}"'.format(
            list(kwargs.keys())[0]))

    return FeatureUnion(_name_estimators(transformers), n_jobs=n_jobs)
Exemple #12
0
def make_pipeline(*steps):
    """Construct a Pipeline from the given estimators.

    This is a shorthand for the Pipeline constructor; it does not require, and
    does not permit, naming the estimators. Instead, their names will be set
    to the lowercase of their types automatically.

    Parameters
    ----------
    *steps : list of estimators.

    See Also
    --------
    sklearn.pipeline.Pipeline : Class for creating a pipeline of
        transforms with a final estimator.

    Examples
    --------
    >>> from sklearn.naive_bayes import GaussianNB
    >>> from sklearn.preprocessing import StandardScaler
    >>> make_pipeline(StandardScaler(), GaussianNB(priors=None))
    Pipeline(steps=[('standardscaler', StandardScaler()),
                    ('gaussiannb', GaussianNB())])

    Returns
    -------
    p : Pipeline
    """
    return Scaler(_name_estimators(steps))
Exemple #13
0
def make_pipeline(*steps, **kwargs):
    """Construct a Pipeline from the given estimators.

    This is a shorthand for the Pipeline constructor; it does not require, and
    does not permit, naming the estimators. Instead, their names will be set
    to the lowercase of their types automatically.

    Parameters
    ----------
    *steps : list of estimators.

    memory : None, str or object with the joblib.Memory interface, optional
        Used to cache the fitted transformers of the pipeline. By default,
        no caching is performed. If a string is given, it is the path to
        the caching directory. Enabling caching triggers a clone of
        the transformers before fitting. Therefore, the transformer
        instance given to the pipeline cannot be inspected
        directly. Use the attribute ``named_steps`` or ``steps`` to
        inspect estimators within the pipeline. Caching the
        transformers is advantageous when fitting is time consuming.

    Returns
    -------
    p : Pipeline

    """
    memory = kwargs.pop('memory', None)
    if kwargs:
        raise TypeError('Unknown keyword arguments: "{}"'.format(
            list(kwargs.keys())[0]))
    return Pipeline(_name_estimators(steps), memory=memory)
Exemple #14
0
    def __init__(self, clfs, voting='hard', weights=None, verbose=0):

        self.clfs = clfs
        self.named_clfs = {key: value for key, value in _name_estimators(clfs)}
        self.voting = voting
        self.weights = weights
        self.verbose = verbose
Exemple #15
0
    def __init__( self, regressors, weights = None, fitting = None, clone = False, debug = False ):
        """
        Args :
            regressors : list <regressors オブジェクト>
                回帰器のオブジェクトのリスト
            weights : list <float>
                各回帰器の対する重みの値のリスト : __init()__ の引数と同名のオブジェクトの属性
            fitting : list<bool>
                各回帰器の対する学習を行うかのフラグのリスト
        """
        self.regressors = regressors
        self.fitted_regressors = regressors
        self.weights = weights
        self.clone = clone
        self.debug = debug
        if regressors != None:
            self.n_classifier = len( regressors )
        else:
            self.n_classifier = 0

        # regressors で指定した各オブジェクトの名前
        if regressors != None:
            self.named_regressors = { key: value for key, value in _name_estimators(regressors) }
        else:
            self.named_regressors = {}

        if( self.debug ):
            for i, named_classifier in enumerate(self.named_regressors):
                print( "name {} : {}".format(i, self.named_regressors[named_classifier]) )

        return
Exemple #16
0
    def __init__( self, regressors, final_regressors, second_regressors = None, n_splits = 4, clone = False, seed = 72 ):
        self.regressors = regressors
        self.fitted_regressors = regressors
        self.final_regressors = final_regressors
        self.second_regressors = second_regressors
        self.fitted_second_regressors = second_regressors

        self.n_classifier = len( regressors )
        if( second_regressors != None ):
            self.n_second_regressors = len( second_regressors )
        else:
            self.n_second_regressors = 0

        self.n_splits = n_splits
        self.clone = clone
        self.seed = seed
        self.accuracy = None

        # classifiers で指定した各オブジェクトの名前
        if regressors != None:
            self.named_regressors = { key: value for key, value in _name_estimators(regressors) }
        else:
            self.named_regressors = {}

        for i, named_regressor in enumerate(self.named_regressors):
            print( "name {} : {}".format(i, self.named_regressors[named_regressor]) )

        return
Exemple #17
0
    def __init__(self, clfs, voting='hard', weights=None, verbose=0):

        self.clfs = clfs
        self.named_clfs = {key: value for key, value in _name_estimators(clfs)}
        self.voting = voting
        self.weights = weights
        self.verbose = verbose
 def __init__(self, classifiers, vote='classlabel', weights=None):
     self.classifiers = classifiers  # list of classifiers
     self.vote = vote  # 'probability' or 'classlabel'
     self.named_classifiers = {
         key: value
         for key, value in _name_estimators(classifiers)
     }
     self.weights = weights  # weights for each of the classifiers
Exemple #19
0
    def __init__(self,
                 classifiers,
                 meta_classifier,
                 use_probas=False,
                 verbose=0):

        self.classifiers = classifiers
        self.meta_classifier = meta_classifier
        self.named_classifiers = {
            key: value
            for key, value in _name_estimators(classifiers)
        }
        self.named_meta_classifier = {
            'meta-%s' % key: value
            for key, value in _name_estimators([meta_classifier])
        }
        self.use_probas = use_probas
        self.verbose = verbose
 def __init__(self, clfs, voting='hard', weights=None):
     """
         voting: if 'hard', uses predicted class labels for majority rule voting
                 if 'soft', predicts the class label based on the argmax of the sums of the predicted probalities
     """
     self.clfs = clfs
     self.named_clfs = {key:value for key, value in _name_estimators(clfs)}
     self.voting = voting
     self.weights = weights
 def __init__(self, clfs, voting, weights=None, threshold=None):
     self.clfs = clfs
     self.named_clfs = {key: value for key, value in _name_estimators(clfs)}
     self.voting = voting
     if voting is 'weighted':
         self.combiner = WeightedVote(weights=weights, threshold=threshold)
     elif voting is 'majority':
         self.combiner = MajorityVote()
     else:
         raise AttributeError('Unrecognized voting method')
 def __init__(self, clfs, voting='hard', weights=None):
     """
         voting: if 'hard', uses predicted class labels for majority rule voting
                 if 'soft', predicts the class label based on the argmax of the sums of the predicted probalities
     """
     self.clfs = clfs
     # _name_estimators([LogisticRegression()]) ==> [('logisticregression', LogisticRegression(C=1.0,  ....))]
     self.named_clfs = {key:value for key, value in _name_estimators(clfs)}
     self.voting = voting
     self.weights = weights
    def __init__(self, classifiers, vote='classlabel', weights=None):

        self.classifiers = classifiers
        # named_classifiers([..]) リストの中身は識別器で,
        # この関数はそれを識別器の名前(sklearnが独自に決めた識別器のクラスの小文字)と,
        # 識別器オブジェクトに分けられ,タプルのリストとなっている.
        self.named_classifiers = {key: value for key, value
                                  in _name_estimators(classifiers)}
        self.vote = vote
        self.weights = weights
Exemple #24
0
 def __init__(self, clfs, voting, weights=None, threshold=None):
     self.clfs = clfs
     self.named_clfs = {key:value for key,value in _name_estimators(clfs)}
     self.voting=voting
     if voting is 'weighted':
         self.combiner=WeightedVote(weights=weights, threshold=threshold)
     elif voting is 'majority':
         self.combiner=MajorityVote()
     else:
         raise AttributeError('Unrecognized voting method')
def make_pipeline(*steps, **kwargs):
    """Construct a Pipeline from the given estimators.

    This is a shorthand for the Pipeline constructor; it does not require, and
    does not permit, naming the estimators. Instead, their names will be set
    to the lowercase of their types automatically.

    Parameters
    ----------
    *steps : list of estimators.

    memory : None, str or object with the joblib.Memory interface, optional
        Used to cache the fitted transformers of the pipeline. By default,
        no caching is performed. If a string is given, it is the path to
        the caching directory. Enabling caching triggers a clone of
        the transformers before fitting. Therefore, the transformer
        instance given to the pipeline cannot be inspected
        directly. Use the attribute ``named_steps`` or ``steps`` to
        inspect estimators within the pipeline. Caching the
        transformers is advantageous when fitting is time consuming.

    verbose : boolean, optional (default=False)
        If True, the time elapsed while fitting each step will be printed as it
        is completed.

    Returns
    -------
    p : Pipeline

    See also
    --------
    imblearn.pipeline.Pipeline : Class for creating a pipeline of
        transforms with a final estimator.

    Examples
    --------
    >>> from sklearn.naive_bayes import GaussianNB
    >>> from sklearn.preprocessing import StandardScaler
    >>> make_pipeline(StandardScaler(), GaussianNB(priors=None))
    ... # doctest: +NORMALIZE_WHITESPACE
    Pipeline(memory=None,
             steps=[('standardscaler',
                     StandardScaler(copy=True, with_mean=True, with_std=True)),
                    ('gaussiannb',
                     GaussianNB(priors=None, var_smoothing=1e-09))],
             verbose=False)
    """
    memory = kwargs.pop("memory", None)
    verbose = kwargs.pop('verbose', False)
    if kwargs:
        raise TypeError('Unknown keyword arguments: "{}"'.format(
            list(kwargs.keys())[0]))
    return Pipeline(pipeline._name_estimators(steps),
                    memory=memory,
                    verbose=verbose)
def make_union(*transformers, **kwargs):
    n_jobs = kwargs.pop('n_jobs', None)
    verbose = kwargs.pop('verbose', False)
    if kwargs:
        # We do not currently support `transformer_weights` as we may want to
        # change its type spec in make_union
        raise TypeError('Unknown keyword arguments: "{}"'.format(
            list(kwargs.keys())[0]))
    return PandasFeatureUnion(_name_estimators(transformers),
                              n_jobs=n_jobs,
                              verbose=verbose)
Exemple #27
0
    def fit(self, X, y):
        """Learn weight coefficients from training data for each regressor.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples and
            n_features is the number of features.
        y : array-like, shape = [n_samples]
            Target values.

        Returns
        -------
        self : object

        """
        self.regr_ = [clone(regr) for regr in self.regressors]
        self.meta_regr_ = clone(self.meta_regressor)
        if self.verbose > 0:
            print("Fitting %d regressors..." % (len(self.regressors)))

        for regr in self.regr_:

            if self.verbose > 0:
                i = self.regr_.index(regr) + 1
                print("Fitting regressor%d: %s (%d/%d)" %
                      (i, _name_estimators(
                          (regr, ))[0][0], i, len(self.regr_)))

            if self.verbose > 2:
                if hasattr(regr, 'verbose'):
                    regr.set_params(verbose=self.verbose - 2)

            if self.verbose > 1:
                print(_name_estimators((regr, ))[0][1])

            regr.fit(X, y)

        meta_features = self._predict_meta_features(X)
        self.meta_regr_.fit(meta_features, y)
        return self
def _get_transformer_list(estimators):
    """
    Construct (name, trans, column) tuples from list
    """
    message = ('`make_column_transformer` expects (transformer, columns)')

    transformers, columns = zip(*estimators)

    names, _ = zip(*_name_estimators(transformers))

    transformer_list = list(zip(names, transformers, columns))
    return transformer_list
def make_pipeline(*steps):
    """Construct a Pipeline from the given estimators.

    This is a shorthand for the Pipeline constructor; it does not require, and
    does not permit, naming the estimators. Instead, their names will be set
    to the lowercase of their types automatically.

    Returns
    -------
    p : Pipeline
    """
    return Pipeline(pipeline._name_estimators(steps))
Exemple #30
0
def make_pipeline(*steps):
    """Construct a Pipeline from the given estimators.

    This is a shorthand for the Pipeline constructor; it does not require, and
    does not permit, naming the estimators. Instead, their names will be set
    to the lowercase of their types automatically.

    Returns
    -------
    p : Pipeline
    """
    return Pipeline(pipeline._name_estimators(steps))
Exemple #31
0
        def get_all_performance_metrics(self, metrics=None):
            "get metrics for all classifiers"

            (clf_list, clf_names) = self.get_component_classifiers()
            ensemble_classifier = _name_estimators([self.estimator])[0][0]

            for clf, label in zip(clf_list, clf_names):
                print("\n{}\n{}\n{}\n".format("*"*60, label, "*"*60))
                self.get_performance_metrics(metrics, clf)

            print("\n{}\n{}\n{}\n".format("*"*60, ensemble_classifier, "*"*60))
            self.get_performance_metrics(metrics, self.estimator)
Exemple #32
0
    def fit(self, X, y):
        """ Fit ensemble classifers and the meta-classifier.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples and
            n_features is the number of features.
        y : array-like, shape = [n_samples]
            Target values.

        Returns
        -------
        self : object

        """
        self.clfs_ = [clone(clf) for clf in self.classifiers]
        self.meta_clf_ = clone(self.meta_classifier)
        if self.verbose > 0:
            print("Fitting %d classifiers..." % (len(self.classifiers)))

        for clf in self.clfs_:

            if self.verbose > 0:
                i = self.clfs_.index(clf) + 1
                print("Fitting classifier%d: %s (%d/%d)" %
                      (i, _name_estimators((clf, ))[0][0], i, len(self.clf_)))

            if self.verbose > 2:
                if hasattr(clf, 'verbose'):
                    clf.set_params(verbose=self.verbose - 2)

            if self.verbose > 1:
                print(_name_estimators((clf, ))[0][1])

            clf.fit(X, y)

        meta_features = self._predict_meta_features(X)
        self.meta_clf_.fit(meta_features, y)
        return self
 def __init__(self,
              classifiers,
              vote='probability',
              weights=None,
              method='majority_voting'):
     self.classifiers = classifiers
     self.named_classifiers = {
         k: v
         for k, v in _name_estimators(classifiers)
     }
     self.vote = vote
     self.weights = weights
     self.method = method
    def __init__(self,
                 classifiers,
                 weights=None,
                 fitting=None,
                 vote_method="majority_vote"):
        """
        Args :
            classifiers : list <classifier オブジェクト>
                分類器のクラスのオブジェクトのリスト
            weights : list <float>
                各分類器の対する重みの値のリスト : __init()__ の引数と同名のオブジェクトの属性
            fitting : list<bool>
                各分類器の対する学習を行うかのフラグのリスト
            vote_method : str ( "majority_vote" or "probability_vote" )
                アンサンブルによる最終的な判断判断手法 : __init()__ の引数と同名のオブジェクトの属性
                "majority_vote"    : 弱識別器の多数決で決定する.多数決方式 (=クラスラベルの argmax() 結果)
                "probability_vote" : 弱識別器の重み付け結果で決定する.(=クラスの所属確率の argmax() 結果)
        """
        self.classifiers = classifiers
        self.fitting = fitting
        self.fitted_classifiers = classifiers
        self.weights = weights
        self.n_classes = 0
        if classifiers != None:
            self.n_classifier = len(classifiers)
        else:
            self.n_classifier = 0

        self.vote_method = vote_method
        self.encoder = LabelEncoder()

        # classifiers で指定した各オブジェクトの名前
        if classifiers != None:
            self.named_classifiers = {
                key: value
                for key, value in _name_estimators(classifiers)
            }
        else:
            self.named_classifiers = {}

        for i, named_classifier in enumerate(self.named_classifiers):
            print("name {} : {}".format(
                i, self.named_classifiers[named_classifier]))

        if fitting == None:
            fitting = []
            for i in range(len(self.classifiers)):
                fitting.append(True)

        return
 def __init__(self, classifiers, vote='classlabel', weights=None):
     """
     Constructor
     """
     self.classifiers = classifiers
     self.named_classifiers = {
         key: value
         for key, value in _name_estimators(classifiers)
     }
     self.vote = vote
     self.weights = weights
     self.lablenc_ = LabelEncoder()
     self.classifiers_ = []
     self.classes_ = []
Exemple #36
0
 def __init__(
     self,
     estimator=None,
     output_dtype=float,
     output_dims=((None, np.nan),),
     fit_input="data",
     transform_input="data",
     estimator_name=None,
     model_path=None,
     features_dir=None,
     extension=".hdf5",
     save_func=None,
     load_func=None,
     dataset_map=None,
     input_dask_array=False,
     fit_kwargs=None,
     **kwargs,
 ):
     super().__init__(**kwargs)
     self.estimator = estimator
     self.output_dtype = output_dtype
     if not all(len(d) == 2 for d in output_dims):
         raise ValueError(
             "output_dims must be an iterable of size 2 tuples "
             f"(dim_name, dim_size), not {output_dims}"
         )
     self.output_dims = output_dims
     self.fit_input = fit_input
     self.transform_input = transform_input
     if estimator_name is None:
         estimator_name = _name_estimators([estimator])[0][0]
     self.estimator_name = estimator_name
     self.model_path = model_path
     self.features_dir = features_dir
     self.extension = extension
     estimator_save_fn = (
         None
         if estimator is None
         else estimator._get_tags().get("bob_features_save_fn")
     )
     estimator_load_fn = (
         None
         if estimator is None
         else estimator._get_tags().get("bob_features_load_fn")
     )
     self.save_func = save_func or estimator_save_fn or save
     self.load_func = load_func or estimator_load_fn or load
     self.dataset_map = dataset_map
     self.input_dask_array = input_dask_array
     self.fit_kwargs = fit_kwargs or {}
Exemple #37
0
def make_debug_pipeline(*steps, **kwargs):
    """Construct a DebugPipeline from the given estimators.
    This is a shorthand for the DebugPipeline constructor; it does not require, and
    does not permit, naming the estimators. Instead, their names will be set
    to the lowercase of their types automatically.
    Parameters
    ----------
    *steps : list of estimators.
    memory : None, str or object with the joblib.Memory interface, optional
        Used to cache the fitted transformers of the pipeline. By default,
        no caching is performed. If a string is given, it is the path to
        the caching directory. Enabling caching triggers a clone of
        the transformers before fitting. Therefore, the transformer
        instance given to the pipeline cannot be inspected
        directly. Use the attribute ``named_steps`` or ``steps`` to
        inspect estimators within the pipeline. Caching the
        transformers is advantageous when fitting is time consuming.
    verbose : boolean, default=False
        If True, the time elapsed while fitting each step will be printed as it
        is completed.
    log_callback: string, default=None.
        The callback function that logs information in between each
        intermediate step. Defaults to None. If set to `'default'`,
        :func:`default_log_callback` is used.

        See :func:`default_log_callback` for an example.

    See Also
    --------
    sklego.pipeline.DebugPipeline : Class for creating a pipeline of
        transforms with a final estimator.
    Examples
    --------
    >>> from sklearn.naive_bayes import GaussianNB
    >>> from sklearn.preprocessing import StandardScaler
    >>> make_debug_pipeline(StandardScaler(), GaussianNB(priors=None))
    DebugPipeline(steps=[('standardscaler', StandardScaler()),
                    ('gaussiannb', GaussianNB())])
    Returns
    -------
    p : DebugPipeline
    """
    memory = kwargs.pop('memory', None)
    verbose = kwargs.pop('verbose', False)
    log_callback = kwargs.pop('log_callback', None)
    if kwargs:
        raise TypeError('Unknown keyword arguments: "{}"'
                        .format(list(kwargs.keys())[0]))
    return DebugPipeline(_name_estimators(steps), memory=memory, verbose=verbose, log_callback=log_callback)
def make_pipeline(*steps, **kwargs):
    """Construct a Pipeline from the given estimators.

    This is a shorthand for the Pipeline constructor; it does not require, and
    does not permit, naming the estimators. Instead, their names will be set
    to the lowercase of their types automatically.

    Parameters
    ----------
    *steps : list of estimators.

    memory : None, str or object with the joblib.Memory interface, optional
        Used to cache the fitted transformers of the pipeline. By default,
        no caching is performed. If a string is given, it is the path to
        the caching directory. Enabling caching triggers a clone of
        the transformers before fitting. Therefore, the transformer
        instance given to the pipeline cannot be inspected
        directly. Use the attribute ``named_steps`` or ``steps`` to
        inspect estimators within the pipeline. Caching the
        transformers is advantageous when fitting is time consuming.

    Returns
    -------
    p : Pipeline

    See also
    --------
    imblearn.pipeline.Pipeline : Class for creating a pipeline of
        transforms with a final estimator.

    Examples
    --------
    >>> from sklearn.naive_bayes import GaussianNB
    >>> from sklearn.preprocessing import StandardScaler
    >>> make_pipeline(StandardScaler(), GaussianNB(priors=None))
    ...     # doctest: +NORMALIZE_WHITESPACE
    Pipeline(memory=None,
             steps=[('standardscaler',
                     StandardScaler(copy=True, with_mean=True, with_std=True)),
                    ('gaussiannb',
                     GaussianNB(priors=None, var_smoothing=1e-09))])
    """
    memory = kwargs.pop('memory', None)
    if kwargs:
        raise TypeError('Unknown keyword arguments: "{}"'
                        .format(list(kwargs.keys())[0]))
    return Pipeline(pipeline._name_estimators(steps), memory=memory)
Exemple #39
0
def make_sparkunion(*transformers):
    """Construct a FeatureUnion from the given transformers.
    This is a shorthand for the FeatureUnion constructor; it does not require,
    and does not permit, naming the transformers. Instead, they will be given
    names automatically based on their types. It also does not allow weighting.
    Examples
    --------
    >>> from sklearn.decomposition import PCA, TruncatedSVD
    >>> make_union(PCA(), TruncatedSVD())    # doctest: +NORMALIZE_WHITESPACE
    FeatureUnion(n_jobs=1,
                 transformer_list=[('pca', PCA(copy=True, n_components=None,
                                               whiten=False)),
                                   ('truncatedsvd',
                                    TruncatedSVD(algorithm='randomized',
                                                 n_components=2, n_iter=5,
                                                 random_state=None, tol=0.0))],
                 transformer_weights=None)
    Returns
    -------
    f : FeatureUnion
    """
    return SparkFeatureUnion(_name_estimators(transformers))
Exemple #40
0
    def __init__(self, clfs, voting="hard", weights=None):

        self.clfs = clfs
        self.named_clfs = {key: value for key, value in _name_estimators(clfs)}
        self.voting = voting
        self.weights = weights
    def transform(self, X, y=None):
        xform_data = self.transform_.transform(X, y)
        return np.append(X, xform_data, axis=1)


class LogExpPipeline(Pipeline):
    def fit(self, X, y):
        super(LogExpPipeline, self).fit(X, np.log1p(y))

    def predict(self, X):
        return np.expm1(super(LogExpPipeline, self).predict(X))

#
# Model/pipeline with scaling,pca,svm
# knn
knn_pipe = LogExpPipeline(_name_estimators([RobustScaler(),
                                            KNeighborsRegressor(n_neighbors = 15, metric = 'cityblock')]))
#
svm_pipe = LogExpPipeline(_name_estimators([RobustScaler(),
                                            SVR(kernel='rbf', C=30, epsilon=0.05)]))

# results = cross_val_score(svm_pipe, train, y_train, cv=5, scoring='r2')
# print("SVM score: %.4f (%.4f)" % (results.mean(), results.std()))
# exit()

#
# Model/pipeline with scaling,pca,ElasticNet
#
en = ElasticNet(alpha=0.01, l1_ratio=0.9)

#
# XGBoost model
    def transform(self, X, y=None):
        xform_data = self.transform_.transform(X, y)
        return np.append(X, xform_data, axis=1)


class LogExpPipeline(Pipeline):
    def fit(self, X, y):
        super(LogExpPipeline, self).fit(X, y)

    def predict(self, X):
        return super(LogExpPipeline, self).predict(X)

#
# Model/pipeline with scaling,pca,svm
# knn
knn_pipe = LogExpPipeline(_name_estimators([RobustScaler(),
                                            KNeighborsClassifier(n_neighbors = 15, metric = 'cityblock')]))
#
svm_pipe = LogExpPipeline(_name_estimators([RobustScaler(),
                                            SVC(kernel='rbf', C=14)]))

# results = cross_val_score(svm_pipe, train, y_train, cv=5, scoring='r2')
# print("SVM score: %.4f (%.4f)" % (results.mean(), results.std()))
# exit()


#
# XGBoost model
#
xgb_model = xgb.XGBClassifier(max_depth=4, learning_rate=0.0045, subsample=0.921,nthread=6,
                                     objective='multi:softmax', n_estimators=500)
 def __init__(self, classifiers):
     self.classifiers = classifiers
     self.named_classifiers = {key: value for key, value in _name_estimators(classifiers)}
Exemple #44
0
def make_alpha_pipeline(*steps):
    return AlphaPipeline(_name_estimators(steps))
def make_dataframe_pipeline(steps):
    """Construct a DataFramePipeline from the given estimators."""
    return DataFramePipeline(_name_estimators(steps))
Exemple #46
0
def make_transformer_pipeline(*steps):
    """Construct a TransformerPipeline from the given estimators.
    """
    return TransformerPipeline(_name_estimators(steps))