def __init__(self,
              n_estimators=10,
              criterion='mse',
              max_depth=None,
              min_samples_split=2,
              min_samples_leaf=1,
              max_features='auto',
              max_leaf_nodes=None,
              bootstrap=True,
              oob_score=False,
              n_jobs=1,
              random_state=None,
              verbose=0,
              min_density=None,
              compute_importances=None):
     RandomForestRegressor.__init__(self,
                                    n_estimators=n_estimators,
                                    criterion=criterion,
                                    max_depth=max_depth,
                                    min_samples_split=min_samples_split,
                                    min_samples_leaf=min_samples_leaf,
                                    max_features=max_features,
                                    max_leaf_nodes=max_leaf_nodes,
                                    bootstrap=bootstrap,
                                    compute_importances=compute_importances,
                                    oob_score=oob_score,
                                    n_jobs=n_jobs,
                                    random_state=random_state,
                                    verbose=verbose)
Beispiel #2
0
    def __init__(self,
                 n_estimators=10,
                 criterion="mse",
                 max_depth=None,
                 min_samples_split=2,
                 min_samples_leaf=1,
                 min_weight_fraction_leaf=0.,
                 max_features="auto",
                 max_leaf_nodes=None,
                 min_impurity_decrease=0.,
                 min_impurity_split=None,
                 bootstrap=True,
                 oob_score=False,
                 n_jobs=1,
                 random_state=None,
                 verbose=0,
                 warm_start=False):
        n_jobs = 4
        n_estimators = int(n_estimators)

        _RandomForestRegressor.__init__(
            self, n_estimators, criterion, max_depth, min_samples_split,
            min_samples_leaf, min_weight_fraction_leaf, max_features,
            max_leaf_nodes, min_impurity_decrease, min_impurity_split,
            bootstrap, oob_score, n_jobs, random_state, verbose, warm_start)
        BaseWrapperReg.__init__(self)
Beispiel #3
0
    def __init__(self,
                 n_estimators=10,
                 random_state=1,
                 X_train=None,
                 Y_train=None,
                 data=None):
        """[summary]

        Args:
            n_estimators (int, optional): Nombre d'arbres de décision. 10 par défaut.
            random_state (int, optional): Seed de l'aléatoire. 1 par défaut.
            X_train (df, optional): Données applaties en entrée.
            Y_train (df, optional): Données de sortie.
            data (df, optional): Données non applaties en entrée.
        """
        RandomForestRegressor.__init__(self,
                                       n_estimators=n_estimators,
                                       random_state=random_state)
        self.X_train = X_train
        self.Y_train = Y_train
        self.data = data
        if not os.path.exists("./model"):
            os.makedirs("./model")
	def __init__(self, n_estimators=10, criterion='mse', max_depth=None, min_samples_split=2, min_samples_leaf=1, max_features='auto', max_leaf_nodes=None, bootstrap=True, oob_score=False, n_jobs=1, random_state=None, verbose=0, min_density=None, compute_importances=None):
		RandomForestRegressor.__init__(self, n_estimators=n_estimators, criterion=criterion, max_depth=max_depth, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, max_features=max_features, max_leaf_nodes= max_leaf_nodes, bootstrap= bootstrap, compute_importances=compute_importances, oob_score=oob_score, n_jobs=n_jobs, random_state=random_state, verbose=verbose)
 def __init__(self, k=1000, var_threshold=0, **kwargs):
     self.k = k
     RandomForestRegressor.__init__(self, **kwargs)
     self.filter = SelectKBest(score_func=f_regression, k=k)
     self.var_threshold = var_threshold
 def __init__(self, reserved_columns=[], **kwargs):
     self.reserved_columns = reserved_columns
     RandomForestRegressor.__init__(self, **kwargs)
     self.feature_names = []
Beispiel #7
0
    def __init__(self, pmml, n_jobs=None):
        PMMLBaseRegressor.__init__(self, pmml)

        mining_model = self.root.find('MiningModel')
        if mining_model is None:
            raise Exception('PMML model does not contain MiningModel.')

        segmentation = mining_model.find('Segmentation')
        if segmentation is None:
            raise Exception('PMML model does not contain Segmentation.')

        if segmentation.get('multipleModelMethod') not in [
                'majorityVote', 'average'
        ]:
            raise Exception(
                'PMML model ensemble should use majority vote or average.')

        # Parse segments
        segments = segmentation.findall('Segment')
        valid_segments = [
            segment for segment in segments if segment.find('True') is not None
        ]

        if len(valid_segments) < len(segments):
            warnings.warn(
                'Warning: {} segment(s) ignored because of unsupported predicate.'
                .format(len(segments) - len(valid_segments)))

        n_estimators = len(valid_segments)
        self.n_outputs_ = 1
        RandomForestRegressor.__init__(self,
                                       n_estimators=n_estimators,
                                       n_jobs=n_jobs)
        self._validate_estimator()

        clf = self._make_estimator(append=False, random_state=123)
        try:
            clf.n_features_in_ = self.n_features_in_
        except AttributeError:
            clf.n_features_ = self.n_features_
        clf.n_outputs_ = self.n_outputs_
        self.template_estimator = clf

        self.estimators_ = [
            get_tree(self, s, rescale_factor=0.1) for s in valid_segments
        ]

        # Required after constructing trees, because categories may be inferred in
        # the parsing process
        target = self.target_field.get('name')
        fields = [
            field for name, field in self.fields.items() if name != target
        ]
        for clf in self.estimators_:
            n_categories = np.asarray([
                len(self.field_mapping[field.get('name')][1].categories)
                if field.get('optype') == 'categorical' else -1
                for field in fields if field.tag == 'DataField'
            ],
                                      dtype=np.int32,
                                      order='C')
            clf.n_categories = n_categories
            clf.tree_.set_n_categories(n_categories)

        self.categorical = [x != -1 for x in self.estimators_[0].n_categories]
Beispiel #8
0
 def __init__(self, **kwargs):
     RandomForestRegressor.__init__(self, **kwargs)
     self.tree_means_per_leaf = []
     self.tree_vars_per_leaf = []
 def __init__(self):
     RandomForestRegressor.__init__(self,
                                    n_estimators=RandomForest.n_est,
                                    warm_start=True)
     OnlineScorer.__init__(self, batch_size=RandomForest.batch)
    def __init__(self, *args, **kargs):

        RandomForestRegressor.__init__(self, *args, **kargs)
        self.weight = np.array([1]).astype(np.float32)
        self.weight_gpu = cuda.mem_alloc(self.weight.nbytes)
train_data = munge_rest(train_df)
test_data = munge_rest(test_df)

#Delete revenue column from train data
x = np.delete(train_data, 37, 1)

#Define revenue as target variable
revenue = train_data[:, 37]
'''TRAINING'''
# Create the random forest object which will include all the parameters
# for the fit
forest = RandomForestRegressor(n_estimators=100,
                               max_depth=None,
                               max_features='sqrt',
                               min_samples_split=3)
forest.__init__(oob_score=True)
# Fit the training data to the revenue and create the decision trees
forest = forest.fit(x, train_data[0::, 37])

# Try the Ridge Regression model
clf = linear_model.Ridge(alpha=0.75,
                         fit_intercept=True,
                         normalize=True,
                         copy_X=True,
                         max_iter=1000,
                         tol=0.015)
#Fit the training data to the revenue and create the Ridge regression model
Ridge = clf.fit(x, train_data[0::, 37])

#prints the oob score -- I think
#print forest.score(train_data[0::,1::],train_data[0::,37])