def train_cuml_classifier(data, labels, depth=25, trees=100): """Train SKLearn classification model""" model = cuRFC( max_depth=depth, n_estimators=trees, random_state=0 ) return model.fit(data, labels)
def _construct_rf( n_estimators, random_state, **kwargs ): return cuRFC( n_estimators=n_estimators, random_state=random_state, **kwargs )
def RandomForest_train(self, n_estimators): # training AdaBoost for each scored label self.y_pred = np.zeros( (self.X_val.shape[0], len(self.targets_scored_col_name))) for i in tqdm(range(len(self.targets_scored_col_name))): this_target_col_name = self.targets_scored_col_name[i] # if samples < 5 in this label, pass if self.y_train[this_target_col_name].values.sum() < 5: self.y_pred[:, i] = np.zeros(len(self.X_val)) else: self.model = cuRFC( n_estimators=n_estimators, # Number of trees in the forest max_depth=32, # Maximum tree depth max_features= 'auto', # Ratio of number of features (columns) to consider per node split ) self.model.fit(self.X_train[self.features], self.y_train[this_target_col_name]) self.y_pred[:, i] = cupy.asnumpy( self.model.predict_proba( self.X_val[self.features]).values)[:, 1]
def _func_build_rf( n, n_estimators, max_depth, handle, max_features, n_bins, split_algo, split_criterion, min_rows_per_node, bootstrap, bootstrap_features, type_model, verbose, rows_sample, max_leaves, n_streams, quantile_per_tree, dtype, r, ): return cuRFC( n_estimators=n_estimators, max_depth=max_depth, handle=handle, max_features=max_features, n_bins=n_bins, split_algo=split_algo, split_criterion=split_criterion, min_rows_per_node=min_rows_per_node, bootstrap=bootstrap, bootstrap_features=bootstrap_features, type_model=type_model, verbose=verbose, rows_sample=rows_sample, max_leaves=max_leaves, n_streams=n_streams, quantile_per_tree=quantile_per_tree, gdf_datatype=dtype, )
def _construct_rf(n_estimators, seed, **kwargs): return cuRFC(n_estimators=n_estimators, seed=seed, **kwargs)