Python RandomForestRegressor.fit_generator Beispiele

Programmiersprache: Python

Namespace / Paketname: sklearn.ensemble

Methode / Funktion: fit_generator

Beispiele auf hotexamples.com: 1

Python RandomForestRegressor.fit_generator - 1 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die sklearn.ensemble.RandomForestRegressor.fit_generator, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

RandomForestRegressor(30)

get_params(30)

fit(30)

apply(29)

decision_path(14)

add(11)

compile(10)

__init__(10)

evaluate(4)

estimators_(4)

n_estimators(4)

get_support(3)

max_features(3)

fit_transform(2)

metrics(2)

get_booster(2)

min_samples_leaf(2)

oob_prediction_(2)

n_jobs(2)

inverse_transform(1)

optimize(1)

is_neg(1)

load_weights(1)

max_depth(1)

oob_score_(1)

max_leaf_nodes(1)

mean_absolute_error(1)

n_features_(1)

min_weight_fraction_leaf(1)

o_is_neg(1)

o_bits(1)

min_impurity_decrease(1)

min_impurity_split(1)

name(1)

n_outputs_(1)

inverse_func(1)

min_samples_split(1)

mean_squared_error(1)

DMatrix(1)

ind(1)

constraint_upper(1)

__getattribute__(1)

__getstate__(1)

__str__(1)

_more_tags(1)

accuracy(1)

append(1)

argsort(1)

best_iteration(1)

bits(1)

Beispiel #1

Datei anzeigen

Datei: WNV.py Projekt: kwkwvenusgod/SparkBeyond_Challenge

    def train(self):
        start = timeit.default_timer()
        train_x, train_y, feature_list = self.feature_extraction()
        self._feature_size = [train_x.shape[1], 1]
        self._features = feature_list

        if not self._silent:
            print "Train has %d instances " % (len(train_x))

        counts = Counter(train_y)
        expectation_ratio = 1 / float(len(counts.keys()))
        n_samples = len(train_y)
        for key, value in counts.items():
            tmp = float(expectation_ratio) / (float(value) / float(n_samples))
            if (tmp > 6) | (tmp < (1.0 / 6.0)):
                self._data_balance = True

        extra_fit_args = dict()
        if self._weight_col is not None:
            extra_fit_args['sample_weight'] = train_x[self._weight_col].values
            del train_x[self._weight_col]

        if 0 < self._bootstrap < 1.0:
            if self._bootstrap_seed is not None:
                if not self._silent:
                    print "Setting bootstrap seed to %d" % self._bootstrap_seed
                np.random.seed(self._bootstrap_seed)
                random.seed(self._bootstrap_seed)
            bootstrap_len = int(math.floor(self._bootstrap * len(train_x)))
            bootstrap_ix = random.sample(range(len(train_x)), bootstrap_len)
            train_x = train_x.iloc[bootstrap_ix]
            train_x.reset_index()
            train_y = train_y.iloc[bootstrap_ix]
            train_y.reset_index()

        model = None

        if self._model_type == "RandomForestRegressor":
            if model is None:
                if self._data_balance is True:
                    self._fit_args.update({"class_weight": "balanced"})
                model = RandomForestRegressor(**self._fit_args)
                model.fit(X=train_x, y=train_y, **extra_fit_args)
                self._model = model
                self._predict = lambda (fitted_model, pred_x
                                        ): self.continuous_predict(x=pred_x)
                self._have_feat_importance = True

        elif self._model_type == "RandomForestClassifier":
            if model is None:
                # if self._data_balance is True:
                #     self._fit_args.update({"class_weight": "balanced"})
                model = RandomForestClassifier(**self._fit_args)
                model.fit(X=train_x, y=train_y, **extra_fit_args)
                self._model = model
                self._predict = lambda (fitted_model, pred_x): self.pred_proba(
                    x=pred_x)
            self._staged_predict = lambda (
                fitted_model, pred_x): [self._predict((fitted_model, pred_x))]
            self._have_feat_importance = True

        elif self._model_type == "ExtraTreesRegressor":
            if model is None:
                if self._data_balance is True:
                    self._fit_args.update({"class_weight": "balanced"})
                model = ExtraTreesRegressor(**self._fit_args)
                model.fit(X=train_x, y=train_y, **extra_fit_args)
                self._model = model
                self._predict = lambda (fitted_model, pred_x
                                        ): self.continuous_predict(x=pred_x)
                self._have_feat_importance = True

        elif self._model_type == "ExtraTreesClassifier":
            if model is None:
                if self._data_balance is True:
                    self._fit_args.update({"class_weight": "balanced"})
                model = ExtraTreesClassifier(**self._fit_args)
                model.fit(X=train_x, y=train_y, **extra_fit_args)
            self._predict = lambda (fitted_model, pred_x): self.pred_proba(
                x=pred_x)
            self._staged_predict = lambda (
                fitted_model, pred_x): [self._predict((fitted_model, pred_x))]
            self._have_feat_importance = True

        elif self._model_type == "GradientBoostingRegressor":
            if model is None:
                model = GradientBoostingRegressor(**self._fit_args)
                model.fit(X=train_x, y=train_y, **extra_fit_args)
                self._model = model
            elif self._load_type == "fit_more":
                model.warm_start = True
                model.n_estimators += self._fit_args['n_estimators']
                model.fit(X=train_x, y=train_y)
                self._model = model
            self._predict = lambda (fitted_model, pred_x
                                    ): self.continuous_predict(x=pred_x)
            self._staged_predict = lambda (
                fitted_model, pred_x): self.staged_pred_continuous(x=pred_x)
            if self._load_type == "pred_at" and self._fit_args[
                    'n_estimators'] < model.n_estimators:
                if not self._silent:
                    print("Predict using %d trees" %
                          self._fit_args['n_estimators'])
                self._predict = lambda (
                    fitted_model, pred_x): self.staged_pred_continuous_at_n(
                        x=pred_x, n=self._fit_args['n_estimators'])
        elif self._model_type == "GradientBoostingClassifier":
            if model is None:
                model = GradientBoostingClassifier(**self._fit_args)
                model.fit(X=train_x, y=train_y, **extra_fit_args)
                self._model = model
            elif self._load_type == "fit_more":
                model.warm_start = True
                model.n_estimators += self._fit_args['n_estimators']
                model.fit(X=train_x, y=train_y)
                self._model = model
                self._staged_predict = lambda (
                    fitted_model, pred_x): self.staged_pred_proba(x=pred_x)
            self._predict = lambda (fitted_model, pred_x): self.pred_proba(
                x=pred_x)
            if self._load_type == "pred_at" and self._fit_args[
                    'n_estimators'] < model.n_estimators:
                if not self._silent:
                    print("Predict using %d trees" %
                          self._fit_args['n_estimators'])
                self._predict = lambda (
                    fitted_model, pred_x): self.staged_pred_proba_at_n(
                        x=pred_x, n=self._fit_args['n_estimators'])
        elif self._model_type == "LogisticRegression":
            if model is None:
                if self._data_balance is True:
                    self._fit_args.update({"class_weight": "balanced"})
                model = LogisticRegression(**self._fit_args)
                model.fit(X=train_x, y=train_y)
                self._model = model
            self._predict = lambda (fitted_model, pred_x): self.pred_proba(
                x=pred_x)
            self._staged_predict = lambda (
                fitted_model, pred_x): [self._predict((fitted_model, pred_x))]

        elif self._model_type == "SVC":
            if model is None:
                if self._data_balance is True:
                    self._fit_args.update({"class_weight": "balanced"})
                model = sklearn.svm.SVC(**self._fit_args)
                model.fit(X=train_x, y=train_y)
                self._model = model
            self._predict = lambda (fitted_model, pred_x): self.pred_proba(
                x=pred_x)
            self._staged_predict = lambda (
                fitted_model, pred_x): [self._predict((fitted_model, pred_x))]
        elif self._model_type == "CNN":
            if model is None:
                train_data = load_pd_df(self._input_dir + '/train.csv')
                indices, max_len = self.process_date_list(
                    train_data['Date'].map(
                        lambda x: datetime.datetime.strptime(x, '%Y-%m-%d')))
                self._feature_size = [train_x.shape[1], max_len]

                NB_FILTER = [64, 128]
                NB_Size = [4, 3, 3]
                FULLY_CONNECTED_UNIT = 256
                model = Sequential()
                model.add(
                    Conv2D(NB_FILTER[0], (train_x.shape[1], NB_Size[0]),
                           input_shape=train_x.shape,
                           border_mode='valid',
                           activation='relu'))
                model.add(MaxPooling2D(pool_size=(1, 3)))
                model.add(
                    Conv2D(NB_FILTER[1], (1, NB_Size[1]), border_mode='valid'))
                model.add(MaxPooling2D(pool_size=(1, 3)))
                model.add(Flatten())
                model.add(
                    Dense(FULLY_CONNECTED_UNIT,
                          activation='relu',
                          W_constraint=maxnorm(3),
                          kernel_regularizer=regularizers.l2(0.01)))
                model.add(Dense(2, activation='softmax'))
                model.compile(loss='categorical_crossentropy',
                              optimizer=Adamax(),
                              metrics=['accuracy'])
                model.fit(train_x,
                          train_y,
                          batch_size=16,
                          epochs=50,
                          verbose=1)
        elif self._model_type == "LSTM":
            if model is None:
                train_data = load_pd_df(self._input_dir + '/train.csv')
                indices, max_len = self.process_date_list(
                    train_data['Date'].map(
                        lambda x: datetime.datetime.strptime(x, '%Y-%m-%d')))
                self._feature_size = [train_x.shape[1], max_len]

                class_weight = {
                    1:
                    np.divide(float(n_samples), float(
                        (len(counts) * counts[1]))),
                    0:
                    np.divide(float(n_samples), float(
                        (len(counts) * counts[0])))
                }
                # class_weight = {1: 10,
                #                 0: 1}
                model = CNN_LSTM(
                    (self._feature_size[0], 4),
                    (None, self._feature_size[0], self._feature_size[1], 1))
                model.fit_generator(
                    generator=self.generator(train_x, train_y, indices,
                                             max_len),
                    epochs=20,
                    class_weight=class_weight,
                    steps_per_epoch=train_x.shape[0] / self._batch_size)
                # model.fit_generator(generator=self.generator(train_x, train_y, indices, max_len),
                #                     epochs=1, class_weight=class_weight, steps_per_epoch=1)
                self._model = model

        elif self._model_type == "Pipeline":
            if model is None:
                model = Pipeline([
                    ('pre_process',
                     get_class(self._fit_args['pre_process']['name'])(
                         self._fit_args['pre_process']['args'])),
                    ('model', get_class(self._fit_args['model']['name'])(
                        self._fit_args['model']['args']))
                ])
                model.fit(X=train_x, y=train_y)
                self._model = model
            self._predict = lambda (fitted_model, pred_x): self.pred_proba(
                x=pred_x)
            self._staged_predict = lambda (
                fitted_model, pred_x): [self._predict((fitted_model, pred_x))]

        if not self._silent:
            stop = timeit.default_timer()
            print "Train time: %d s" % (stop - start)

        del train_x, train_y