Example #1
0
    def __init__(
            self,
            n_clusters=8,
            init='random',
            n_init=1,
            max_iter=300,
            tol=1e-4,
            precompute_distances='auto',
            verbose=0,
            random_state=None,
            copy_x=True,
            n_jobs=1,
            algorithm='auto',
            # Beyond sklearn (with optimal defaults)
            gpu_id=0,
            n_gpus=-1,
            init_data="randomselect",
            do_checks=1,
            backend='auto'):

        import os
        _backend = os.environ.get('H2O4GPU_BACKEND', None)
        if _backend is not None:
            backend = _backend
        assert_is_type(backend, str)

        # FIXME: Add init as array and kmeans++ to h2o4gpu
        # setup backup to sklearn class
        # (can remove if fully implement sklearn functionality)
        self.do_sklearn = False
        if backend == 'auto':
            example = np.array([1, 2, 3])
            # pylint: disable=unidiomatic-typecheck
            if type(init) == type(example):
                print("WARNING: init as ndarray of centers not yet supported."
                      "  Using sklearn.")
                self.do_sklearn = True
            else:
                if init == "k-means++":
                    print("WARNING: init as k-means++ not yet supported."
                          "  Using sklearn.")
                    self.do_sklearn = True
            # FIXME: Add n_init to h2o4gpu
            if n_init != 1:
                print("WARNING: n_init not supported currently."
                      "  Still using h2o4gpu.")
            if precompute_distances != "auto":
                print("WARNING: precompute_distances not used."
                      "  Still using h2o4gpu.")
        elif backend == 'sklearn':
            self.do_sklearn = True
        elif backend == 'h2o4gpu':
            self.do_sklearn = False
        if self.do_sklearn:
            self.backend = 'sklearn'
        else:
            self.backend = 'h2o4gpu'

        from h2o4gpu.cluster import k_means_
        self.model_sklearn = k_means_.KMeansSklearn(
            n_clusters=n_clusters,
            init=init,
            n_init=n_init,
            max_iter=max_iter,
            tol=tol,
            precompute_distances=precompute_distances,
            verbose=verbose,
            random_state=random_state,
            copy_x=copy_x,
            n_jobs=n_jobs,
            algorithm=algorithm)
        self.model_h2o4gpu = KMeansH2O(
            n_clusters=n_clusters,
            init=init,
            n_init=n_init,
            max_iter=max_iter,
            tol=tol,
            precompute_distances=precompute_distances,
            verbose=verbose,
            random_state=random_state,
            copy_x=copy_x,
            n_jobs=n_jobs,
            algorithm=algorithm,
            # H2O4GPU
            gpu_id=gpu_id,
            n_gpus=n_gpus,
            init_data=init_data,
            do_checks=do_checks)

        if self.do_sklearn:
            self.model = self.model_sklearn
        else:
            self.model = self.model_h2o4gpu
Example #2
0
    def __init__(
            self,
            n_clusters=8,
            init='k-means++',
            n_init=1,
            max_iter=300,
            tol=1e-4,
            precompute_distances='auto',
            verbose=0,
            random_state=None,
            copy_x=True,
            n_jobs=1,
            algorithm='auto',
            # Beyond sklearn (with optimal defaults)
            gpu_id=0,
            n_gpus=-1,
            do_checks=1,
            backend='auto'):

        import os
        _backend = os.environ.get('H2O4GPU_BACKEND', None)
        if _backend is not None:
            backend = _backend

        # FIXME: Add init as array and kmeans++ to h2o4gpu
        # setup backup to sklearn class
        # (can remove if fully implement sklearn functionality)
        self.do_sklearn = False
        if backend == 'auto':
            example = np.array([1, 2, 3])
            # pylint: disable=unidiomatic-typecheck
            if type(init) == type(example):
                KMeans._print_verbose(
                    verbose, 0,
                    "'init' as ndarray of centers not yet supported."
                    "Running ScikitLearn CPU version.")
                self.do_sklearn = True
            # FIXME: Add n_init to h2o4gpu
            if n_init != 1:
                KMeans._print_verbose(
                    verbose, 0, "'n_init' not supported. "
                    "Running h2o4gpu with n_init = 1.")
            if precompute_distances != "auto":
                KMeans._print_verbose(verbose, 0,
                                      "'precompute_distances' not used.")
        elif backend == 'sklearn':
            self.do_sklearn = True
        elif backend == 'h2o4gpu':
            self.do_sklearn = False
        if self.do_sklearn:
            self.backend = 'sklearn'
        else:
            self.backend = 'h2o4gpu'

        from h2o4gpu.cluster import k_means_
        self.model_sklearn = k_means_.KMeansSklearn(
            n_clusters=n_clusters,
            init=init,
            n_init=n_init,
            max_iter=max_iter,
            tol=tol,
            precompute_distances=precompute_distances,
            verbose=verbose,
            random_state=random_state,
            copy_x=copy_x,
            n_jobs=n_jobs,
            algorithm=algorithm)
        self.model_h2o4gpu = KMeansH2O(
            n_clusters=n_clusters,
            init=init,
            n_init=n_init,
            max_iter=max_iter,
            tol=tol,
            precompute_distances=precompute_distances,
            verbose=verbose,
            random_state=random_state,
            copy_x=copy_x,
            n_jobs=n_jobs,
            algorithm=algorithm,
            # H2O4GPU
            gpu_id=gpu_id,
            n_gpus=n_gpus,
            do_checks=do_checks)
        # pylint: disable=protected-access
        if self.do_sklearn or self.model_h2o4gpu._load_lib() is None:
            self.model = self.model_sklearn
            KMeans._print_verbose(verbose, 0, "Using ScikitLearn backend.")
        else:
            self.model = self.model_h2o4gpu
            KMeans._print_verbose(verbose, 0, "Using h2o4gpu backend.")