Example #1
0
def ms_grid(X: np.ndarray,
            y: np.ndarray,
            n_digits: int,
            n_iter: int = 2) -> RandomizedSearchCV:
    def bandwidth_list():  #calculate and generate bandwidths
        quantiles = np.linspace(0.1, 1., 4)
        bandwidths = []
        for q in quantiles:
            bandwidths.append(
                cluster.estimate_bandwidth(X,
                                           quantile=q,
                                           n_samples=None,
                                           random_state=rand))

        mean = np.mean(bandwidths)
        bandwidths += [mean * 0.1, mean * 2, mean * 5, mean * 10] + list(
            np.linspace(np.maximum(0.01, mean - 10), mean + 10, 5))
        return bandwidths

    estimator = MeanShift()
    param_dict = {'bandwidth': bandwidth_list()}
    grid = utils.grid_search_random(estimator,
                                    param_dict,
                                    X,
                                    y,
                                    utils.evaluate_items,
                                    n_iter,
                                    random_state=rand)
    return grid
Example #2
0
def wh_grid(X: np.ndarray,
            y: np.ndarray,
            n_digits: int,
            n_iter: int = 10) -> RandomizedSearchCV:
    param_dict = {"linkage": ['ward']}
    estimator = G_AgglomerativeClustering(n_clusters=n_digits)
    grid = utils.grid_search_random(estimator,
                                    param_dict,
                                    X,
                                    y,
                                    utils.evaluate_items,
                                    n_iter=1,
                                    random_state=rand)
    return grid
Example #3
0
def ap_grid(X: np.ndarray,
            y: np.ndarray,
            n_digits: int,
            n_iter: int = 2,
            max_iter=10) -> RandomizedSearchCV:
    param_dict = {
        "damping": np.arange(0.5, 1.0, 0.1),
        'convergence_iter': np.linspace(1, 30, 6).astype(int),
    }
    estimator = AffinityPropagation(max_iter=max_iter)
    grid = utils.grid_search_random(estimator,
                                    param_dict,
                                    X,
                                    y,
                                    utils.evaluate_items,
                                    n_iter,
                                    random_state=10)
    return grid
Example #4
0
def km_grid(X: np.ndarray,
            y: np.ndarray,
            n_digits: int,
            n_iter: int = 1e6) -> RandomizedSearchCV:
    param_dict = {
        'init': ['k-means++', 'random'],
        "algorithm": ['full', 'elkan']
    }
    estimator = KMeans(n_clusters=n_digits, n_init=rand)
    grid = utils.grid_search_random(estimator,
                                    param_dict,
                                    X,
                                    y,
                                    utils.evaluate_items,
                                    n_iter,
                                    random_state=rand)

    return grid
Example #5
0
def gm_grid(X: np.ndarray,
            y: np.ndarray,
            n_digits: int,
            n_iter: int = 20) -> RandomizedSearchCV:
    param_dict = {
        "n_components": [1, 2, 3, 4, 5, 8, 12],
        "covariance_type": ['full', 'tied', 'diag', 'spherical'],
        'init_params': ['kmeans', 'random']
    }
    estimator = GaussianMixture(random_state=rand)
    grid = utils.grid_search_random(estimator,
                                    param_dict,
                                    X,
                                    y,
                                    utils.evaluate_items,
                                    n_iter,
                                    random_state=rand)
    return grid
Example #6
0
def ag_grid(X: np.ndarray,
            y: np.ndarray,
            n_digits: int,
            n_iter: int = 10) -> RandomizedSearchCV:
    #this function does not include 'ward' of linkage
    param_dict = {
        "affinity": ['euclidean', 'l1', 'l2', 'manhattan', 'cosine'],
        "linkage": ['complete', 'average', 'single']
    }
    estimator = G_AgglomerativeClustering(n_clusters=n_digits)
    grid = utils.grid_search_random(estimator,
                                    param_dict,
                                    X,
                                    y,
                                    utils.evaluate_items,
                                    n_iter,
                                    random_state=rand)
    return grid
Example #7
0
def sc_grid(X: np.ndarray,
            y: np.ndarray,
            n_digits: int,
            n_iter: int = 10) -> RandomizedSearchCV:
    param_dict = {
        "assign_labels": ['kmeans', 'discretize'],
        "affinity": ['nearest_neighbors', 'rbf'],
        "n_neighbors": [3, 7, 10, 15, 30, 50]
    }
    estimator = G_SpectralClustering(n_clusters=n_digits,
                                     n_init=rand,
                                     random_state=rand)
    grid = utils.grid_search_random(estimator,
                                    param_dict,
                                    X,
                                    y,
                                    utils.evaluate_items,
                                    n_iter,
                                    random_state=rand)
    return grid
Example #8
0
def db_grid(X: np.ndarray,
            y: np.ndarray,
            n_digits: int,
            n_iter: int = 20) -> RandomizedSearchCV:
    param_dict = {
        'eps': [0.2, 0.5, 1, 1.5, 3, 5, 10, 20,
                50],  # maximum distance between two neighbors
        'min_samples':
        [2, 5, 7, 9, 10, 20, 50,
         100],  # minimal neighborhood number as a core point, include itself
        'metric': ['euclidean', 'l1', 'l2']
    }

    estimator = G_DBSCAN()
    grid = utils.grid_search_random(estimator,
                                    param_dict,
                                    X,
                                    y,
                                    utils.evaluate_items,
                                    n_iter,
                                    random_state=rand)
    return grid