Python ACの例、sklearn.cluster.AC Pythonの例

コード例 #1

0

ファイルを表示

ファイル: models.py プロジェクト: chimo1998/hw3_M10917031

 def fit(self, draw=False, p=5):
     self.p = p
     self.draw = draw
     hie = None
     if draw: # for drawing
         hie = AC(n_clusters=None, compute_full_tree=True, distance_threshold=0)
     else:
         hie = AC(n_clusters=self.nc)
     t0 = time.time()
     hie.fit(self.vectors)
     print("Training cost %0.3fs" % (time.time() - t0))
     self.model = hie

コード例 #2

0

ファイルを表示

ファイル: custom_functions.py プロジェクト: NandishRPatel/data-science-project-itec_pract_03-tue-7pm-_group-h

def process_and_plot(df, shrink, ix = None):
    df =  z_scale(df.T).T
    if ix == None:
        ix = AC(4).fit(df).labels_.argsort() # a trick to make better heatmaps
    cap = np.min([np.max(df.values), np.abs(np.min(df.values))])
    df = np.clip(df, -1*cap, cap)
    custom_heatmap(df, shrink, ix = ix)

コード例 #3

0

ファイルを表示

 def __init__(self,
              n_clusters=2,
              affinity='euclidean',
              memory=None,
              connectivity=None,
              compute_full_tree='auto',
              linkage='ward',
              distance_threshold=None,
              compute_distances=False):
     self.n_clusters = n_clusters
     self.memory = memory
     self.compute_distances = compute_distances
     self.affinity = affinity
     self.linkage = linkage
     self.distance_threshold = distance_threshold
     self.connectivity = connectivity
     self.compute_full_tree = compute_full_tree
     self.model = AC(compute_distances=self.compute_distances,
                     distance_threshold=self.distance_threshold,
                     affinity=self.affinity,
                     connectivity=self.connectivity,
                     linkage=self.linkage,
                     n_clusters=self.n_clusters,
                     memory=self.memory,
                     compute_full_tree=self.compute_full_tree)

コード例 #4

0

ファイルを表示

ファイル: utils.py プロジェクト: vitoriapacela/raccoon_networks

def hc_cluster_score(X, g):
    """Calculates the silhouette score for all the possible k clusters defined in Hierarchical Clustering.

    Input:  Graph object g
            Distance matrix X

    Output: Dict with labels of the cluster with the best score
            List with the silhouette scores"""
    scores = []
    labels = []
    for i in range(2, len(X)):
        hierarchical_model = AC(n_clusters=i,
                                affinity='precomputed',
                                linkage='average').fit(X)
        l = hierarchical_model.labels_
        s = silhouette_score(X, l, metric="precomputed")
        scores.append(s)
        labels.append(l)

    idx = np.argmax(scores)
    clust_lab = {
        list(g.nodes())[i]: list(labels[idx])[i]
        for i in range(len(labels[idx]))
    }
    return clust_lab, scores

コード例 #5

0

ファイルを表示

ファイル: henry.py プロジェクト: u14006512/MIT-805-Assignments

def final_plot(dataset, num = None):
   
    scaled_dataset = scale_dataset(dataset.T).T
    if num is None:
        num = AC(4).fit(scaled_dataset).labels_.argsort()
    range = np.min([np.max(scaled_dataset.as_matrnum()), np.abs(np.min(scaled_dataset.as_matrnum()))])
    scaled_dataset = np.clip(scaled_dataset, -1*range, range)
    plotting_heatmap(scaled_dataset, num=num)

コード例 #6

0

ファイルを表示

def HAC():
    hac = AC(n_clusters=32).fit(feature)
    pred_label = hac.labels_
    print('silhouette_score = ', metrics.silhouette_score(feature, pred_label, metric='euclidean'))
    print('homogeneity_score = ', metrics.homogeneity_score(label, pred_label.tolist()))
    #print(hac.labels_)

    folder_result('HAC', pred_label)

コード例 #7

0

ファイルを表示

    def clusterize(self):

        print("Starting clustering...")
        t1 = time.monotonic()
        agg_clus = AC(n_clusters=self.num_clus,
                      affinity=self.affinity,
                      linkage=self.linkage)
        self.predictions = agg_clus.fit_predict(self.feature_matrix)
        print("Done training in {}s".format(
            timedelta(seconds=time.monotonic() - t1)))

コード例 #8

0

ファイルを表示

def hierarchical(n, img):

    Z_2 = img.reshape((-1, len(img[0][0])))
    # print(Z_2)
    # ac_model=AC(n_clusters=n,linkage='average',compute_full_tree='false',affinity='cosine')
    ac_model = AC(n_clusters=n)

    ac_labels = ac_model.fit_predict(Z_2)
    img_labels_3 = ac_labels.reshape((img.shape[0], img.shape[1]))
    return img_labels_3

コード例 #9

0

ファイルを表示

ファイル: temporalanalysis.py プロジェクト: nagashree-angadi/Crime-Data-Analysis

def scale_and_plot(df, ix = None):
    '''
    A wrapper function to calculate the scaled values within each row of df and plot_hmap
    '''
    df_marginal_scaled = scale_df(df.T).T
    if ix is None:
        ix = AC(4).fit(df_marginal_scaled).labels_.argsort()
    cap = np.min([np.max(df_marginal_scaled.as_matrix()), np.abs(np.min(df_marginal_scaled.as_matrix()))])
    df_marginal_scaled = np.clip(df_marginal_scaled, -1*cap, cap)
    plot_hmap(df_marginal_scaled, ix=ix)
    return df_marginal_scaled

コード例 #10

0

ファイルを表示

def clustering(k, x, series_avg, met):
    if met == "KM":
        res = KMeans(k).fit(series_avg)
        return res.cluster_centers_, res.labels_
    elif met == "AC":
        res = AC(n_clusters=k, linkage="complete").fit(series_avg)
        cent = np.array(
            [np.mean(series_avg[res.labels_ == i], axis=0) for i in range(k)])
        return cent, res.labels_
    elif met == "KS":
        label = DTW.KShape(k, series_avg)
        cent = np.array(
            [np.mean(series_avg[label == i], axis=0) for i in range(k)])
        return cent, label

コード例 #11

0

ファイルを表示

ファイル: mounddetection1.py プロジェクト: BarbaraDh/3DTerrain-TermiteMounds

def clustering(filtered, th_clust):
    """
    Clusters data using Agglomerative Clustering. Distance Threshold: 30 default

    """
    from sklearn.cluster import AgglomerativeClustering as AC
    agC = AC(n_clusters=None, distance_threshold=th_clust, memory=None)
    X = np.array(filtered.iloc[:, :3])
    agC.fit(X)
    labels = np.array([agC.labels_]).T
    #res = np.concatenate((X, np.array([agC.labels_]).T), axis = 1)
    amountclusters = len(set(agC.labels_))
    print('Amount of clusters: ' + str(amountclusters))
    return labels, amountclusters

コード例 #12

0

ファイルを表示

ファイル: utils.py プロジェクト: davidath/severe-weather-detect

def log_anomalyPRF_AC(cp, ground_truth, dataset, log_flag, SEED=1234):
    # Init clustering hyperparameters
    n_clusters = cp.getint('Hyperparameters', 'ClusterNum')
    cluster_init = cp.getint('Hyperparameters', 'ClusterInit')
    # KMeans model
    #  km = KMeans(n_clusters=n_clusters, n_init=cluster_init, n_jobs=-1,
    #  random_state=SEED)
    km = AC(n_clusters=n_clusters)
    if isinstance(dataset, basestring):
        pred = km.fit_predict(np.load(dataset))
    else:
        pred = km.fit_predict(dataset)
    pred = assign_labels(pred, ground_truth)
    print CR(ground_truth, pred)

コード例 #13

0

ファイルを表示

ファイル: utils.py プロジェクト: davidath/severe-weather-detect

def log_NMI_AC(cp, ground_truth, dataset, log_flag, SEED=1234):
    # Init clustering hyperparameters
    n_clusters = cp.getint('Hyperparameters', 'ClusterNum')
    cluster_init = cp.getint('Hyperparameters', 'ClusterInit')
    # KMeans model
    #  km = KMeans(n_clusters=n_clusters, n_init=cluster_init, n_jobs=-1,
    #  random_state=SEED)
    km = AC(n_clusters=n_clusters)
    if isinstance(dataset, basestring):
        pred = km.fit_predict(np.load(dataset))
    else:
        pred = km.fit_predict(dataset)
    log('--------------- {} {} ------------------------'.format(
        log_flag, NMI(ground_truth, pred)))

コード例 #14

0

ファイルを表示

def perform_clustering(npop: int,
                       coordinates,
                       simulation
                       ) -> Tuple['np.ndarray[int]', 'np.ndarray[float]',
                                  'np.ndarray[float]']:
    """Perform agglomerative clustering for simulation
    Return found labels, distances from large eigenvalues,
    eigenvalues read from file, and labels read from file.
    """

    if simulation.output_level >= 1:
        print('clustering will be performed on a ' + str(coordinates.shape) + ' matrix')

    clusterer = AC(n_clusters=npop, compute_full_tree=True,linkage="ward")
    lab_infered = clusterer.fit_predict(coordinates)

    return lab_infered

コード例 #15

0

ファイルを表示

def agglomerative_propagation(matrix, n_cluster, words):

    start = t.time()
    affinity = AC(affinity="precomputed",
                  n_clusters=n_cluster,
                  linkage="complete",
                  compute_full_tree=True)
    affinity.fit(matrix)
    clusters = []

    for index in range(0, n_cluster):
        lista = []
        clusters.append(lista)

    for index in range(0, len(words)):
        clusters[affinity.labels_[index]].append(words[index])
    end = t.time()

    return affinity, clusters, end - start

コード例 #16

0

ファイルを表示

def twoDimension(data, nclusters, linkage_type):
    arrs = []
    for line in data:
        arrs.append(xy(line))

    nparr = np.asarray(arrs)
    #getDendogram(nparr,linkage_type)
    #Agglomerative Clusters
    hc = AC(n_clusters=nclusters, affinity='euclidean', linkage=linkage_type)
    y_hc = hc.fit_predict(nparr)
    print("CLUSTER ZERO:", nparr[y_hc == 0])
    print("CLUSTER ONE:", nparr[y_hc == 1])
    print("CLUSTER TWO:", nparr[y_hc == 2])
    print("CLUSTER THREE:", nparr[y_hc == 3])

    plt.scatter(nparr[y_hc == 0, 0], nparr[y_hc == 0, 1], s=100, c='red')
    plt.scatter(nparr[y_hc == 1, 0], nparr[y_hc == 1, 1], s=100, c='black')
    plt.scatter(nparr[y_hc == 2, 0], nparr[y_hc == 2, 1], s=100, c='blue')
    plt.scatter(nparr[y_hc == 3, 0], nparr[y_hc == 3, 1], s=100, c='cyan')
    plt.show()

コード例 #17

0

ファイルを表示

ファイル: tfidf_clustering.py プロジェクト: dreamysx/lorelei

def clustering(idTfidf, num_clu, term_num):
    docFeature = idTfidf
    vecTfidf = {}
    for file in idTfidf:
        row = np.zeros(len(idTfidf[file]))
        col = idTfidf[file].keys()
        val = idTfidf[file].values()
        vec = csc_matrix((np.array(val), (np.array(row), np.array(col))), shape=(1, term_num))
        vecTfidf[file] = vec.todense().tolist()[0]
    # print vecTfidf
    features = vecTfidf.values()
    # print features

    selection = 'GM'  # selecting model here!!! Options: AgglomerativeClustering as AC, SpectralClustering as SC, GMM

    if selection == 'AC':
        model = AC(n_clusters=num_clu, affinity='cosine', linkage='average')
    if selection == 'SC':
        model = SC(n_clusters=num_clu, affinity='cosine')
    if selection == 'GMM':
        model = GMM(n_components=num_clu, covariance_type='full')
    if selection == 'GM':
        model = GM(n_components=num_clu)
        model.fit(features)
        res = model.predict(features)
    else:
        res = model.fit_predict(features)

    resDic = {}
    for i in range(len(res)):
        if not resDic.has_key(res[i]):
            resDic[res[i]] = []
            resDic[res[i]].append(int(docFeature.keys()[i]))
        else:
            resDic[res[i]].append(int(docFeature.keys()[i]))
    result = resDic.values()
    # print result
    with open('gt_GMRes.json', 'w') as f:
        f.write(json.dumps(result))

    return result

コード例 #18

0

ファイルを表示

def choose_k(X,k_range):
    X = X.T
    X = X[:32]
    print(X.shape)
    
    X_mean = sum(X)/len(X)
    chs = []
    n = len(X)
    for k in range(2, k_range):
        clf = AC(n_clusters = k, linkage = 'average')
        clf.fit(X)
        labels = clf.labels_
        
        centroids = np.zeros((k, len(X[0])))
        counts = np.zeros((k, 1))
        for i in range(n):
            for l in range(k):
                if l == labels[i]:
                    centroids[l] += X[i]
                    counts[l][0] += 1
        centroids /= counts
        W = 0
        B = 0
        for label in range(k):
            for i in range(len(X)):
                if labels[i] == label:
                    W += np.linalg.norm((X[i] - centroids[label]) , 2) ** 2
            B += counts[label][0] * (np.linalg.norm((centroids[label] - X_mean) ,2)** 2)
        up = B/(k - 1)
        down = W/(n - k)
        chs.append(up/down)
        
    plt.figure()
    plt.plot([i + 2 for i in range(len(chs))],  chs)
    plt.xlabel('k')
    plt.ylabel('ch value')
    plt.title('Choose best k')
    plt.show()

コード例 #19

0

ファイルを表示

ファイル: clustering.py プロジェクト: geokav/pytheas-splitting

    def getClustersSK(self, X, method="single"):
        """
        Get the model and labels for all possible clusters from
        the built-in sklearn function for agglomerative clustering.
        No connectivity matrix is used, since data are (apparently)
        unstructured.
        k=[1,N-1], where N the number of observations

        :type X: array-like
        :param X: 2D array containing the x,y coordinates of the points
            to be clustered.
        :type method: str, optional
        :param method: method for linking clusters. Defaults to 'single'. 

        """
        Mmax = len(X) - 1
        M = np.arange(1, Mmax + 1)
        L = {}
        for k in M:
            model = AC(n_clusters=k, linkage=method,
                       affinity="euclidean").fit(X)
            L.update({k: model.labels_})
        return L

コード例 #20

0

ファイルを表示

def fiveDimension(data, nclusters, linkage_type):
    arrs = []
    for line in data:
        coor = line.split()
        d1 = float(coor[0])
        d2 = float(coor[1])
        d3 = float(coor[2])
        d4 = float(coor[3])
        d5 = float(coor[4])
        arrs.append([d1, d2, d3, d4, d5])

    nparr = np.asarray(arrs)
    #getDendogram(nparr, linkage_type)
    hc = AC(n_clusters=nclusters, affinity='euclidean', linkage=linkage_type)
    y_hc = hc.fit_predict(nparr)
    print(nparr[y_hc == 0][0, 0])

    plt.scatter(nparr[y_hc == 0, 0], nparr[y_hc == 0, 1], s=100, c='red')
    plt.scatter(nparr[y_hc == 1, 0], nparr[y_hc == 1, 1], s=100, c='black')
    plt.scatter(nparr[y_hc == 2, 0], nparr[y_hc == 2, 1], s=100, c='blue')
    plt.scatter(nparr[y_hc == 3, 0], nparr[y_hc == 3, 1], s=100, c='cyan')

    plt.show()

コード例 #21

0

ファイルを表示

def unsupervised_clu(feature, part, model_selection):
    if part:
        if feature == 'graph':
            docFeature = json.loads(
                open('rmMultiPart1WOZeroGraph.json').read())
        if feature == 'doc2vec':
            docFeature = json.loads(open('rmMultiPart1Doc2vec.json').read())
        if feature == 'comb':
            walk = json.loads(open('rmMultiPart1WOZeroGraph.json').read())
            dv = json.loads(open('rmMultiPart1Doc2vec.json').read())
            docFeature = {}
            for doc in walk:
                val = walk[doc] + dv[doc]
                docFeature[doc] = val
        groundTruth = json.loads(open('rmMultiPart1CluInd.json').read())
        num_clu = len(groundTruth)  # number of clusters in each part
    else:
        rmMulti = True  # False #
        if rmMulti:
            if feature == 'graph':
                docFeature = json.loads(
                    open('rmMultiCluDatabaseWOZeroGraph.json').read())
            if feature == 'doc2vec':
                docFeature = json.loads(
                    open('rmMultiCluDatabaseDoc2vec.json').read())
            if feature == 'comb':
                walk = json.loads(
                    open('rmMultiCluDatabaseWOZeroGraph.json').read())
                dv = json.loads(open('rmMultiCluDatabaseDoc2vec.json').read())
                docFeature = {}
                for doc in walk:
                    val = walk[doc] + dv[doc]
                    docFeature[doc] = val
            groundTruth = json.loads(open('rmMultiGroundTruth.json').read())
            num_clu = len(
                groundTruth
            )  # number of clusters after removing documents appearing multi-cluster, #doc = 1274 (3 all 0s for walk)
        else:
            if feature == 'graph':
                docFeature = json.loads(
                    open('cluDatabaseWOZeroGraph.json').read())
            if feature == 'doc2vec':
                docFeature = json.loads(open('cluDatabaseDoc2vec.json').read())
            if feature == 'comb':
                walk = json.loads(open('cluDatabaseWOZeroGraph.json').read())
                dv = json.loads(open('cluDatabaseDoc2vec.json').read())
                docFeature = {}
                for doc in walk:
                    val = walk[doc] + dv[doc]
                    docFeature[doc] = val
            groundTruth = json.loads(open('groundTruth.json').read())
            num_clu = len(
                groundTruth
            )  # number of clusters before removing documents appearing multi-cluster, #doc = 1393 (3 all 0s for walk)

    features = docFeature.values()
    if model_selection == 'AC':
        model = AC(n_clusters=num_clu, affinity='cosine', linkage='average')
    if model_selection == 'SC':
        model = SC(n_clusters=num_clu, affinity='cosine')
    if model_selection == 'GMM':
        model = GMM(n_components=num_clu, covariance_type='full')
    if model_selection == 'KMeans':
        model = KMeans(n_clusters=num_clu)
    if model_selection == 'GM':
        model = GM(n_components=num_clu)
        model.fit(features)
        res = model.predict(features)
    else:
        res = model.fit_predict(features)
    resDic = {}
    for i in range(len(res)):
        if not resDic.has_key(res[i]):
            resDic[res[i]] = []
            resDic[res[i]].append(int(docFeature.keys()[i]))
        else:
            resDic[res[i]].append(int(docFeature.keys()[i]))
    result = resDic.values()

    return (result, groundTruth)

コード例 #22

0

ファイルを表示

ファイル: clustering.py プロジェクト: dreamysx/lorelei

def unsupervised(numClu, graphEmb):
    print 'Buidling unsupervised model...'
    model = AC(n_clusters=numClu, affinity='cosine', linkage='complete')
    res = model.fit_predict(graphEmb.values())
    return res

コード例 #23

0

ファイルを表示

ファイル: AC_clustering.py プロジェクト: Tingji2419/date_mining

import time
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import AgglomerativeClustering as AC
from sklearn.decomposition import PCA

tfidf = pd.read_csv('tfidf.csv')
data = tfidf.values[:, 1:]

numClass = 4
AC_model = AC(n_clusters=numClass, affinity="euclidean",
              linkage='ward')  # 层次聚类
pca = PCA(n_components=10)
TnewData = pca.fit_transform(data)  # 先降维
t0 = time.time()
AC_model.fit(TnewData)  # 再聚类
elapsed_time = time.time() - t0

pca = PCA(n_components=2)  # 为了画二维图，这里设置输出两维
newData = pca.fit_transform(data)  # 载入N维
result = AC_model.labels_  # labels_为聚类结果
plt.scatter(newData[:, 0], newData[:, 1], c=result, cmap=plt.cm.nipy_spectral)
plt.show()
print("time(s):", elapsed_time)

コード例 #24

0

ファイルを表示

for x in l:
    x1 = float(x.split(" ")[0])
    x2 = float(x.split(" ")[1])
    points.append([x1, x2])

points = np.array(points)

plt.scatter(points[:, 0], points[:, 1])
plt.show()

kmean = KMeans(n_clusters=2, random_state=42).fit(points)

plt.scatter(points[:, 0], points[:, 1], c=kmean.labels_)
plt.show()

aclust = AC(n_clusters=2)
aclust.fit(points)

plt.scatter(points[:, 0], points[:, 1], c=aclust.labels_)
plt.show()

dbc = DBSCAN()
dbc.fit(points)

plt.scatter(points[:, 0], points[:, 1], c=dbc.labels_)
plt.show()

file.close()

file = open('./data/Ring/2D_points.txt')
l = file.readlines()

コード例 #25

0

ファイルを表示

ファイル: visual_aglomer.py プロジェクト: BublikR/neurosystems_clustering

    ListVal.append(ListF[a:b])
    a = b
    b = b + 7

# Del NaN and 1
ListKey = [y for x,y in zip(ListVal, ListKey) if not (math.isnan(x[0]) or (x[0] == 1 and x[1] == 1))]
ListVal = [x for x in ListVal if not (math.isnan(x[0]) or (x[0] == 1 and x[1] == 1))]

DictF = {x: y for x, y in zip(ListKey, ListVal)}

os.chdir('/media/roman/10A2FE37A2FE20C0/Clustering/') # path to image

print('Processed {} descriptors'.format(len(DictF)))

for i in range(2,101):
    agg = AC(n_clusters=i,linkage='average')
    assignment = agg.fit_predict(ListVal)
    result = Counter(assignment)
    clustElem = {}
    for ind, val in enumerate(assignment):
        if val+1 not in clustElem.keys():
            clustElem[val+1] = [ListKey[ind]]
        else:
            clustElem[val+1].append(ListKey[ind])
    clustMedian = {i[0]:i[1][len(i[1])//2] for i in clustElem.items()}

    print('========== {} lavel =========='.format(i-1))
    print('{} clusters'.format(i))
    cE = list(clustElem.items())
    cE.sort()
    for j in cE:

コード例 #26

0

ファイルを表示

                                     'xticks': (),
                                     'yticks': ()
                                 })
        for i, (component, ax) in enumerate(zip(y_people[ind], axes.ravel())):
            ax.imshow(component.reshape(image_shape), cmap='grey')
            ax.set_title("{}. component".format(i + 1))

dbs = DBS()
dbs.fit(X_people)
dbs_assignments = dbs.labels_
dbs_means = dbs.core_sample_indices_

print(len(dbs_means))

a = 100
ac = AC(n_clusters=a)
ac.fit(X_people)
ac_assignments = ac.labels_
ac_means = ac.n_clusters
print(ac_means)

for i in range(a):
    ind = ac_assignments == i
    ent = entropy(y_people[ind])
    print("Cluster {:d}, size = {:d}, entropy = {:.3f}".format(
        i, np.sum(ind), ent))
    if ent > 4 and np.sum(ind) > 10:
        fig, axes = plt.subplots(2,
                                 5,
                                 figsize=(15, 8),
                                 subplot_kw={

コード例 #27

0

ファイルを表示

    linkage_matrix = np.column_stack(
        [model.children_, model.distances_, counts]).astype(float)
    # Plot the corresponding dendrogram
    dendrogram(linkage_matrix, **kwargs)
    # Add axis labels
    plt.xlabel('Data Point')
    plt.ylabel('Distance')


"""# Agglomerative Clustering with TSNE"""

import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import AgglomerativeClustering as AC

ac = AC(n_clusters=None, distance_threshold=0)
ac.fit(X)
plt.figure(figsize=(12, 6))
plot_dendrogram(ac)
plt.xticks([])
plt.show()

from sklearn.manifold import TSNE
tsne = TSNE(random_state=146)
Xtsne = tsne.fit_transform(X)

n = 2
ac = AC(n_clusters=n)
clusters = ac.fit_predict(X)

colors = GetColors(n)

コード例 #28

0

ファイルを表示

ファイル: spike_conv_test.py プロジェクト: abuzarmahmood/firing_space_plot

h5_path = '/media/bigdata/Abuzar_Data/AM23/AM23_4Tastes_200316_134649/AM23_4Tastes_200316_134649_repacked.bk'

h5_file = tables.open_file(h5_path, 'r')

unit_descriptors = h5_file.root.unit_descriptor[:]
sorted_units_path = '/sorted_units'
unit_num = 3

this_unit_waves = h5_file.get_node(
    os.path.join(sorted_units_path, 'unit{0:03d}'.format(unit_num),
                 'waveforms'))[:]

this_unit_pca = pca(n_components=3).fit_transform(this_unit_waves)

ac_cluster = AC().fit(this_unit_pca)
kmeans_cluster = kmeans(n_clusters=3).fit(this_unit_pca)

clust_method = ac_cluster
mean_wavs = [(np.mean(this_unit_waves[clust_method.labels_ == clust],
    axis=0),
    np.std(this_unit_waves[clust_method.labels_ == clust],axis=0)) \
            for clust in np.sort(np.unique(clust_method.labels_))]

img_plot(this_unit_waves[np.argsort(kmeans_cluster.labels_)])
plt.show()

for wav in mean_wavs:
    plt.fill_between(range(len(wav[0])),
                     wav[0] + 2 * wav[1],
                     wav[0] - 2 * wav[1],

コード例 #29

0

ファイルを表示

#importing the dataset
dataset = pd.read_csv('Mall_customers.csv')
X = dataset.iloc[:, [3, 4]].values

#using the dendogram to find the optimal number of cluster
import scipy.cluster.hierarchy as sch
dendogram = sch.dendrogram(sch.linkage(X, method='ward'))
plt.title('Dendogram')
plt.xlabel('Custeomers')
plt.ylabel('Euclidian Distance')
plt.show()

#Fitting the HC to dataset
from sklearn.cluster import AgglomerativeClustering as AC
hc = AC(n_clusters=5, affinity='euclidean', linkage='ward')
y_hc = hc.fit_predict(X)

#visualizing the cluster
plt.scatter(X[y_hc == 0, 0], X[y_hc == 0, 1], s=100, c='red', label='Careful')
plt.scatter(X[y_hc == 1, 0],
            X[y_hc == 1, 1],
            s=100,
            c='blue',
            label='Standard')
plt.scatter(X[y_hc == 2, 0], X[y_hc == 2, 1], s=100, c='green', label='Target')
plt.scatter(X[y_hc == 3, 0],
            X[y_hc == 3, 1],
            s=100,
            c='cyan',
            label='Careless')

コード例 #30

0

ファイルを表示

# plt.imshow(k_means(10,new), cmap=plt.get_cmap('hot'))
# plt.colorbar()
# plt.show()
# Z_2=b.reshape((-1,len(b[0][0])))
# print(Z_2)
# gmm_model=GMM(n_components=4,covariance_type='tied').fit(Z_2)
# gmm_labels=gmm_model.predict(Z_2)
# img_labels_2=label.reshape((b.shape[0],b.shape[1]))
scale_percent = 40  # percent of original size
width = int(rgb.shape[1] * scale_percent / 100)
height = int(rgb.shape[0] * scale_percent / 100)
dim = (width, height)

resized = cv2.resize(rgb, dim, interpolation=cv2.INTER_AREA)
Z_2 = resized.reshape((-1, len(resized[0][0])))
print(Z_2)
ac_model = AC(n_clusters=14,
              linkage='average',
              compute_full_tree='false',
              affinity='cosine')
ac_labels = ac_model.fit_predict(Z_2)
img_labels_3 = ac_labels.reshape((resized.shape[0], resized.shape[1]))
#
plt.imshow(img_labels_3, cmap=plt.get_cmap('hot'))
plt.colorbar()
plt.show()

# cv2.imshow('res2',res2)
# cv2.waitKey(0)
# cv2.destroyAllWindows()