def test_getattr(client): # Test getattr on local param kmeans_model = KMeans(client=client) # Test AttributeError with pytest.raises(AttributeError): kmeans_model.cluster_centers_ assert kmeans_model.client is not None # Test getattr on local_model param with a non-distributed model X, y = make_blobs(n_samples=5, n_features=5, centers=2, n_parts=2, cluster_std=0.01, random_state=10) kmeans_model.fit(X) assert kmeans_model.cluster_centers_ is not None assert isinstance(kmeans_model.cluster_centers_, cupy.ndarray) # Test getattr on trained distributed model X, y = load_text_corpus(client) nb_model = MultinomialNB(client=client) nb_model.fit(X, y) assert nb_model.feature_count_ is not None assert isinstance(nb_model.feature_count_, cupy.ndarray)
def fit(self, X_train, y_train): """ Fit decision tree model """ if 'XGBoost' in self.hpo_config.model_type: hpo_log.info('> fit xgboost model') dtrain = xgboost.dask.DaskDMatrix(self.client, X_train, y_train) num_boost_round = self.hpo_config.model_params['num_boost_round'] xgboost_output = xgboost.dask.train( self.client, self.hpo_config.model_params, dtrain, num_boost_round=num_boost_round) trained_model = xgboost_output['booster'] elif 'RandomForest' in self.hpo_config.model_type: hpo_log.info('> fit randomforest model') trained_model = RandomForestClassifier( n_estimators=self.hpo_config.model_params['n_estimators'], max_depth=self.hpo_config.model_params['max_depth'], max_features=self.hpo_config.model_params['max_features'], n_bins=self.hpo_config.model_params['n_bins']).fit( X_train, y_train.astype('int32')) elif 'KMeans' in self.hpo_config.model_type: hpo_log.info('> fit kmeans model') trained_model = KMeans( n_clusters=self.hpo_config.model_params['n_clusters'], max_iter=self.hpo_config.model_params['max_iter'], random_state=self.hpo_config.model_params['random_state'], init=self.hpo_config.model_params['init']).fit(X_train) return trained_model
def test_getattr(cluster): client = Client(cluster) try: # Test getattr on local param kmeans_model = KMeans(client=client) assert kmeans_model.client is not None # Test getattr on local_model param with a non-distributed model X, y = make_blobs(n_samples=5, n_features=5, centers=2, n_parts=2, cluster_std=0.01, random_state=10) wait(X) kmeans_model.fit(X) assert kmeans_model.cluster_centers_ is not None assert isinstance(kmeans_model.cluster_centers_, cupy.core.ndarray) # Test getattr on trained distributed model X, y = load_text_corpus(client) print(str(X.compute())) nb_model = MultinomialNB(client=client) nb_model.fit(X, y) assert nb_model.feature_count_ is not None assert isinstance(nb_model.feature_count_, cupy.core.ndarray) finally: client.close()
def test_getattr(cluster): client = Client(cluster) # Test getattr on local param kmeans_model = KMeans(client=client) assert kmeans_model.client is not None # Test getattr on local_model param with a non-distributed model X_cudf, y = make_blobs(5, 5, 2, 2, cluster_std=0.01, verbose=False, random_state=10) wait(X_cudf) kmeans_model.fit(X_cudf) assert kmeans_model.cluster_centers_ is not None assert isinstance(kmeans_model.cluster_centers_, cudf.DataFrame) # Test getattr on trained distributed model X, y = load_text_corpus(client) print(str(X.compute())) nb_model = MultinomialNB(client=client) nb_model.fit(X, y) assert nb_model.feature_count_ is not None assert isinstance(nb_model.feature_count_, cupy.core.ndarray)