Esempio n. 1
0
def test_getattr(client):

    # Test getattr on local param
    kmeans_model = KMeans(client=client)

    # Test AttributeError
    with pytest.raises(AttributeError):
        kmeans_model.cluster_centers_

    assert kmeans_model.client is not None

    # Test getattr on local_model param with a non-distributed model

    X, y = make_blobs(n_samples=5,
                      n_features=5,
                      centers=2,
                      n_parts=2,
                      cluster_std=0.01,
                      random_state=10)

    kmeans_model.fit(X)

    assert kmeans_model.cluster_centers_ is not None
    assert isinstance(kmeans_model.cluster_centers_, cupy.ndarray)

    # Test getattr on trained distributed model

    X, y = load_text_corpus(client)

    nb_model = MultinomialNB(client=client)
    nb_model.fit(X, y)

    assert nb_model.feature_count_ is not None
    assert isinstance(nb_model.feature_count_, cupy.ndarray)
    def fit(self, X_train, y_train):
        """ Fit decision tree model """
        if 'XGBoost' in self.hpo_config.model_type:
            hpo_log.info('> fit xgboost model')
            dtrain = xgboost.dask.DaskDMatrix(self.client, X_train, y_train)
            num_boost_round = self.hpo_config.model_params['num_boost_round']

            xgboost_output = xgboost.dask.train(
                self.client,
                self.hpo_config.model_params,
                dtrain,
                num_boost_round=num_boost_round)

            trained_model = xgboost_output['booster']

        elif 'RandomForest' in self.hpo_config.model_type:
            hpo_log.info('> fit randomforest model')
            trained_model = RandomForestClassifier(
                n_estimators=self.hpo_config.model_params['n_estimators'],
                max_depth=self.hpo_config.model_params['max_depth'],
                max_features=self.hpo_config.model_params['max_features'],
                n_bins=self.hpo_config.model_params['n_bins']).fit(
                    X_train, y_train.astype('int32'))

        elif 'KMeans' in self.hpo_config.model_type:
            hpo_log.info('> fit kmeans model')
            trained_model = KMeans(
                n_clusters=self.hpo_config.model_params['n_clusters'],
                max_iter=self.hpo_config.model_params['max_iter'],
                random_state=self.hpo_config.model_params['random_state'],
                init=self.hpo_config.model_params['init']).fit(X_train)

        return trained_model
Esempio n. 3
0
def test_getattr(cluster):

    client = Client(cluster)

    try:
        # Test getattr on local param
        kmeans_model = KMeans(client=client)

        assert kmeans_model.client is not None

        # Test getattr on local_model param with a non-distributed model

        X, y = make_blobs(n_samples=5,
                          n_features=5,
                          centers=2,
                          n_parts=2,
                          cluster_std=0.01,
                          random_state=10)

        wait(X)

        kmeans_model.fit(X)

        assert kmeans_model.cluster_centers_ is not None
        assert isinstance(kmeans_model.cluster_centers_, cupy.core.ndarray)

        # Test getattr on trained distributed model

        X, y = load_text_corpus(client)

        print(str(X.compute()))

        nb_model = MultinomialNB(client=client)
        nb_model.fit(X, y)

        assert nb_model.feature_count_ is not None
        assert isinstance(nb_model.feature_count_, cupy.core.ndarray)

    finally:
        client.close()
Esempio n. 4
0
def test_getattr(cluster):

    client = Client(cluster)

    # Test getattr on local param
    kmeans_model = KMeans(client=client)

    assert kmeans_model.client is not None

    # Test getattr on local_model param with a non-distributed model

    X_cudf, y = make_blobs(5,
                           5,
                           2,
                           2,
                           cluster_std=0.01,
                           verbose=False,
                           random_state=10)

    wait(X_cudf)

    kmeans_model.fit(X_cudf)

    assert kmeans_model.cluster_centers_ is not None
    assert isinstance(kmeans_model.cluster_centers_, cudf.DataFrame)

    # Test getattr on trained distributed model

    X, y = load_text_corpus(client)

    print(str(X.compute()))

    nb_model = MultinomialNB(client=client)
    nb_model.fit(X, y)

    assert nb_model.feature_count_ is not None
    assert isinstance(nb_model.feature_count_, cupy.core.ndarray)