コード例 #1
0
def clusterRegularVerbs_AgglomerativeClustering():
    from sklearn.cluster import AgglomerativeClustering
    from sklearn import metrics
    from sklearn.datasets.samples_generator import make_blobs
    from sklearn.preprocessing import StandardScaler
    from numpy import genfromtxt
    
    my_data = genfromtxt('FeatureMatrix_number.csv', delimiter=',', skip_header=1)
    clustering = AgglomerativeClustering(linkage='complete', n_clusters=11)
    clustering.fit(my_data)
    params = clustering.get_params(deep=True)
    return clustering
コード例 #2
0
ファイル: eda.py プロジェクト: guptaanmol184/craved
    def perform_hierarchial(self, no_clusters, params={}):
        hierarchial_clusterer = AgglomerativeClustering(n_clusters=no_clusters,
                                                        **params)
        hierarchial_clusterer.fit(self.data, hdf5_file=self.hdf5_file)

        self.hierarchial_results = {
            "parameters": hierarchial_clusterer.get_params(),
            "labels": hierarchial_clusterer.labels_,
            "n_clusters": no_clusters,
            'clusters': label_cnt_dict(hierarchial_clusterer.labels_)
        }

        print_dict(self.hierarchial_results)
コード例 #3
0
def baseline_ac(data_matrix, k, plot_params={'plotit':True,
        'x': 'latitude',
        'y': 'longitude',
        'show': True}):
    # Baseline AC clusters using banned library
    model = AgglomerativeClustering(n_clusters=k, affinity='euclidean', linkage='average')
    labels = model.fit_predict(data_matrix)
    print('Model Params: ')
    print( model.get_params() )
    if plot_params['plotit']:
        x = plot_params['x']
        y = plot_params['y']
        plot(data_matrix, labels, title=plot_params['title'], show=plot_params['show'], xlabel=x, ylabel=y )
    return
コード例 #4
0
affinities = ('cosine', )
linkages = ('complete', 'average')
n_clusterss = (5, 10, 20, 30)
tau = 0.9

methods = list()

if algo == "agglomerative":
    # agglomerative
    for affinity, linkage, n_clusters in itertools.product(
            affinities, linkages, n_clusterss):
        agg = AgglomerativeClustering(n_clusters=n_clusters,
                                      affinity=affinity,
                                      linkage=linkage)
        params = agg.get_params()
        params.pop('memory')
        params.pop('pooling_func')
        info = {
            'name': 'Agglomerative Clustering',
            "rep": rep,
            "rep_params": rep_params,
            'params': params
        }

        methods.append((agg, info))
elif algo == "kmeans" or algo == "k-means":
    # k-means
    for n_clusters in n_clusterss:
        km = KMeans(n_clusters=n_clusters,
                    n_jobs=-1,
コード例 #5
0
def _agglomerative_clustering(table,
                              input_cols,
                              prediction_col='prediction',
                              linkage='ward',
                              affinity='euclidean',
                              n_clusters=2,
                              compute_full_tree_auto=True,
                              compute_full_tree=None):
    feature_names, inputarr = check_col_type(table, input_cols)
    _compute_full_tree = 'auto' if compute_full_tree_auto else compute_full_tree
    _affinity = 'euclidean' if linkage == 'ward' else affinity

    ac = AgglomerativeClustering(linkage=linkage,
                                 affinity=_affinity,
                                 n_clusters=n_clusters,
                                 compute_full_tree=_compute_full_tree)

    ac.fit(inputarr)

    label_name = {
        'linkage': 'Linkage',
        'affinity': 'Affinity',
        'n_clusters': 'N Clusters',
        'compute_full_tree': 'Compute Full Tree'
    }
    get_param = ac.get_params()
    param_table = pd.DataFrame.from_items(
        [['Parameter', list(label_name.values())],
         ['Value', [get_param[x] for x in list(label_name.keys())]]])

    labels = ac.labels_
    colors = cm.nipy_spectral(np.arange(n_clusters).astype(float) / n_clusters)

    if len(feature_names) > 1:
        pca2_model = PCA(n_components=2).fit(inputarr)
        pca2 = pca2_model.transform(inputarr)
    fig_samples = _agglomerative_clustering_samples_plot(
        labels, table, input_cols, 100, n_clusters, colors) if len(
            table.index) > 100 else _agglomerative_clustering_samples_plot(
                labels, table, input_cols, None, n_clusters, colors)

    if len(feature_names) > 1:
        fig_pca = _agglomerative_clustering_pca_plot(labels, pca2_model, pca2,
                                                     n_clusters, colors)
        rb = BrtcReprBuilder()
        rb.addMD(
            strip_margin("""
        | ## Spectral Clustering Result
        | ### Samples
        | {fig_samples}
        | {fig_pca}
        | ### Parameters
        | {params}
        """.format(fig_pca=fig_pca,
                   fig_samples=fig_samples,
                   params=pandasDF2MD(param_table))))
    else:
        rb = BrtcReprBuilder()
        rb.addMD(
            strip_margin("""
        | ## Mean Shift Result
        | - Samples
        | {fig_samples}
        | ### Parameters
        | {params}
        """.format(fig_samples=fig_samples, params=pandasDF2MD(param_table))))

    model = _model_dict('agglomerative_clustering')
    model['model'] = ac
    model['input_cols'] = input_cols
    model['_repr_brtc_'] = rb.get()

    out_table = table.copy()
    out_table[prediction_col] = labels
    return {'out_table': out_table, 'model': model}
コード例 #6
0
from sklearn.cluster import AgglomerativeClustering
import numpy as np

data = np.loadtxt(r'sp3combined.test.txt',
                  delimiter=',',
                  skiprows=19995,
                  usecols=(4, 5, 6, 7, 8, 9, 10))

for d in data:
    estimator = AgglomerativeClustering(n_clusters=2)
    estimator.fit_predict(d)
    estimator.get_params(d)
コード例 #7
0
# =======================================================================
# data
# =======================================================================
X = np.array([[1, 2],
			  [1, 4],
			  [1, 0],
			  [4, 2],
			  [4, 4],
			  [4, 0]])

X_test = np.array([[1, 3],
				   [2, 4]])

# =======================================================================
# clustering
# =======================================================================
clustering = AgglomerativeClustering()
clustering.fit(X)


print(clustering)
print(clustering.get_params())
print(clustering.labels_)
print(clustering.n_leaves_)
print(clustering.n_components_)
print(clustering.children_)

predictions = clustering.fit_predict(X_test)
print(predictions)