Example #1
0
def clust(elect_coords, n_clusts, iters, init_clusts):
    
   # Load resultant coordinates from Hough circles transform
   #coords = scipy.io.loadmat(elect_coords);
   #dat = coords.get('elect_coords');
   dat = elect_coords;

   # Configure Kmeans
   cluster = sklearn.cluster.KMeans();
   cluster.n_clusters= n_clusts;
   cluster.init = 'k-means++';
   cluster.max_iter = iters;
   cluster.verbose = 0;
   cluster.n_init = init_clusts;
   cluster.fit(dat);

   # Grab vector for plotting each dimension
   x = list(cluster.cluster_centers_[:,0]);
   y = list(cluster.cluster_centers_[:,1]);
   z = list(cluster.cluster_centers_[:,2]);
   c = list(cluster.labels_);

   scipy.io.savemat('k_labels.mat', {'labels':cluster.labels_})
   scipy.io.savemat('k_coords.mat', {'coords':cluster.cluster_centers_})

   # plot the results of kmeans
   cmap = colors.Colormap('hot');
   norm  = colors.Normalize(vmin=1, vmax=10);
   s = 64;
   fig = plt.figure();
   ax = fig.add_subplot(111,projection='3d');
   Axes3D.scatter3D(ax,x,y,z,s=s);
   plt.show(fig);
   
   return cluster.cluster_centers_,cluster.labels_;
Example #2
0
def clust(elect_coords, n_clusts, iters, init_clusts):

    # Load resultant coordinates from Hough circles transform
    #coords = scipy.io.loadmat(elect_coords);
    #dat = coords.get('elect_coords');
    dat = elect_coords

    # Configure Kmeans
    cluster = sklearn.cluster.KMeans()
    cluster.n_clusters = n_clusts
    cluster.init = 'k-means++'
    cluster.max_iter = iters
    cluster.verbose = 0
    cluster.n_init = init_clusts
    cluster.fit(dat)

    # Grab vector for plotting each dimension
    x = list(cluster.cluster_centers_[:, 0])
    y = list(cluster.cluster_centers_[:, 1])
    z = list(cluster.cluster_centers_[:, 2])
    c = list(cluster.labels_)

    scipy.io.savemat('k_labels.mat', {'labels': cluster.labels_})
    scipy.io.savemat('k_coords.mat', {'coords': cluster.cluster_centers_})

    # plot the results of kmeans
    cmap = colors.Colormap('hot')
    norm = colors.Normalize(vmin=1, vmax=10)
    s = 64
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    Axes3D.scatter3D(ax, x, y, z, s=s)
    plt.show(fig)

    return cluster.cluster_centers_, cluster.labels_
Example #3
0
def get_reduce_cluster_train(train_x, test_x, train_y, test_y, name, k):
    clusters = {'kmeans': get_kmeans(k), 'exmax': get_exmax(k)}
    reducers = {
        'pca': get_pca(),
        'ica': get_ica(),
        'randproj': get_randproj(),
        'kernel': get_kernel()
    }

    results = []
    for cluster_name, cluster in clusters.items():
        for reduction_name, reducer in reducers.items():
            one_hot = OneHotEncoder()

            # Train
            reduced_train = reducer.fit_transform(train_x)
            if cluster_name == 'exmax':
                cluster.fit(reduced_train)
                transformed_train = cluster.predict_proba(reduced_train)
            else:
                transformed_train = cluster.fit_predict(reduced_train)
                transformed_train = one_hot.fit_transform(
                    transformed_train.reshape(-1, 1)).todense()

            nn = MLPClassifier(hidden_layer_sizes=[256] * 3,
                               learning_rate_init=1e-2,
                               early_stopping=True,
                               max_iter=10000)
            nn.fit(transformed_train, train_y)
            train_acc = nn.score(transformed_train, train_y)

            # Test
            reduced_test = reducer.transform(test_x)
            if cluster_name == 'exmax':
                transformed_test = cluster.predict_proba(reduced_test)
            else:
                transformed_test = cluster.predict(reduced_test)
                transformed_test = one_hot.transform(
                    transformed_test.reshape(-1, 1)).todense()

            test_acc = nn.score(transformed_test, test_y)

            results.append({
                'name': f'{name}-{reduction_name}-{cluster_name}',
                'train_acc': train_acc,
                'test_acc': test_acc
            })

    df = pd.DataFrame.from_records(results,
                                   columns=['name', 'train_acc', 'test_acc'])
    print(df)
    df.to_csv(outputs_path / f'reduce-train-cluster-{name}.csv')
Example #4
0
def excersise_1_b_1():
    # EXERCISES
    # Ex.1.b.1: Choosing the number of clusters in Agglomerative clustering
    # Please make sure that you have "shopping-data.csv" stored in the same folder as this notebook.
    # This file contains shopping data of customers. Suppose our task is to segment customers based on their shopping patterns.
    customer_data = pd.read_csv('shopping-data.csv')

    # Before we start, let's explore more about this dataset

    ## Shape of the dataset
    print(customer_data.shape)

    ## Print the first 5 data items
    customer_data.head()

    # Ex.1.b.1: Choosing the number of clusters in Agglomerative clustering (cont)
    # We suspect that the last two entries could be used for clustering
    # Extract the last 2 columns
    data = customer_data.iloc[:, 3:5].values

    # Use dendrogram to visualize hierarchical clustering for this dataset
    plt.figure(figsize=(10, 7))
    plt.title("Customer Dendograms")
    dend = shc.dendrogram(shc.linkage(data, method='ward'))

    # Ex.1.b.1: Choosing the number of clusters in Agglomerative clustering (cont)

    # Now, let's make use of the Dendrogram to sucessfully apply Agglomerative clustering
    # QUESTION: Based on the dendrogram above, what would be the appropriate number of clusters?

    # Computer Agglomerative Clustering
    ### YOUR CODE HERE (Fill in the "None")
    # Hint: define an Agglomerative Clustering object
    cluster = AgglomerativeClustering(n_clusters=2,
                                      affinity="euclidean",
                                      memory=None,
                                      connectivity=None,
                                      compute_full_tree='auto',
                                      linkage='ward',
                                      pooling_func='deprecated')
    # Hint: compute Agglomerative Clustering for our dataset. The "cluster" variable must have attributes.
    cluster.fit(data)
    ### END OF YOUR CODE

    # Visualization
    plt.figure(figsize=(10, 7))
    plt.scatter(data[:, 0], data[:, 1], c=cluster.labels_, cmap='rainbow')
Example #5
0
    def kmeans(self):
        kmeans_params = {
            "n_clusters": self.n_clusters,
            "init": self.init_centers,
            "n_init": self.n_init,
            "max_iter": self.max_iter,
            "tol": self.tol,
            "precompute_distances": self.precompute_distances,
            "verbose": self.verbose,
            "random_state": self.random_state,
            "copy_x": self.copy_x,
            "n_jobs": self.n_jobs,
            "algorithm": self.algorithm
        }
        cluster = KMeans(**kmeans_params)
        cluster.fit(self.X)

        return cluster
import sklearn.cluster
import pandas as pd
import numpy as np
from matplotlib import pyplot

input = np.empty((0,21))
stander = sklearn.preprocessing.MaxAbsScaler()

for i in range(1,2001):
    file = 'flow_per_shop/'+str(i)+'.csv'
    info = pd.read_csv(file)
    ts = info['count'].values
    ts = ts[-21:]
    ts = stander.fit_transform(ts)
    input = np.vstack((input,ts))

print input

cluster = sklearn.cluster.KMeans(n_clusters=1)

af = cluster.fit(input)
# print af.cluster_centers_
labels =  af.labels_
np.savetxt('labels.csv',labels,fmt='%d')

for i in range(8):
    indice = np.where(labels==i)[0]
    pyplot.figure()
    for item in indice[:200]:
        pyplot.plot(input[item,:])
    pyplot.show()
Example #7
0
def create_sanogram(elements_set, img, error_func, replace_color=None, n_colors=5):
    grid_size = elements_set.block_px
    # block coordinates
    h, w = img.shape[:2]
    blocks = []
    for iy in range(h/grid_size):
        for ix in range(w/grid_size):
            by, bx = iy*grid_size, ix*grid_size
            ey, ex = min(h, by+grid_size), min(w, bx+grid_size)
            if (ey-by < grid_size) or (ex-bx < grid_size): continue
            patch = img[by:ey, bx:ex]
            blocks.append((iy, ix, by, bx, ey, ex, patch))
    bh, bw = iy+1, ix+1
    # init labels unassigned.
    labels = np.ndarray((bh, bw), dtype=np.int32)
    labels[:, :] = -1
    # find best patches
    h, w = img.shape[:2]
    for iy, ix, by, bx, ey, ex, patch in blocks:
        errors = [error_func(patch, elem) for i, elem in enumerate(elements_set.elements)]
        min_idx = np.argmin(errors)
        labels[iy, ix] = min_idx
    # determine the new color
    if replace_color == 'direct':
        # use mean color of the target patch directly.
        color_map = np.ndarray((bh, bw, 3), dtype=img.dtype)
        for iy, ix, by, bx, ey, ex, patch in blocks:
            label = labels[iy, ix]
            if not elements_set.elements[label].is_background:
                mean_color = patch[elements_set.elements[label].shape].mean(axis=0)
                color_map[iy, ix] = mean_color
    elif replace_color == 'representative':
        # find <n_colors> representative colors from the input image and use the nearest one for each patch.
        colors = img.reshape((-1, 3))
        cluster = sklearn.cluster.KMeans(n_clusters=n_colors)
        cluster.fit(colors)
        # assign colors
        color_map = np.ndarray((bh, bw, 3), dtype=img.dtype)
        for iy, ix, by, bx, ey, ex, patch in blocks:
            label = labels[iy, ix]
            if not elements_set.elements[label].is_background:
                representative_index = cluster.predict((patch[elements_set.elements[label].shape]).mean(axis=0))
                color_map[iy, ix] = cluster.cluster_centers_[representative_index]
    elif replace_color is None:
        # color is associated to the patch shape according to elements_set.
        color_map = None
    else:
        color_map = None
        print 'unknown replace_color=%s' % replace_color

    # apply labels
    res_img = np.zeros_like(img) + COLOR_BG
    for iy, ix, by, bx, ey, ex, patch in blocks:
        label = labels[iy, ix]
        if label >= 0:
            if color_map is None:
                res_img[by:ey, bx:ex] = elements_set.elements[label].patch
            else:
                res_img[by:ey, bx:ex][elements_set.elements[label].shape] = color_map[iy, ix]
                res_img[by:ey, bx:ex][~elements_set.elements[label].shape] = elements_set.background_color
    return res_img
                                                 test_size=0.30,
                                                 random_state=0)

# %%
# fit the model
cluster = sklearn.cluster.KMeans(n_clusters=8,
                                 init='k-means++',
                                 n_init=10,
                                 max_iter=300,
                                 tol=0.0001,
                                 precompute_distances='auto',
                                 verbose=0,
                                 random_state=None,
                                 copy_x=True,
                                 n_jobs=1)
cluster.fit(features_train)

# %%
# Predict test features
result = cluster.predict(features_test)

# %%
result

# %%
# Perform a plot of the clusters
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

# %%
# Use Principal Component Analysis (PCA) to reduce the dimensions
Example #9
0
tr_aug_features = np.load('./data/voc12/features/train_aug_features.npy',
                          allow_pickle=True)
tr_features = np.load('./data/voc12/features/train_features.npy',
                      allow_pickle=True)
val_features = np.load('./data/voc12/features/val_features.npy',
                       allow_pickle=True)
features = tr_aug_features.tolist() + tr_features.tolist(
) + val_features.tolist()

df = pd.DataFrame.from_records(features)
df.drop_duplicates('img_name', inplace=True)

cluster = cluster.KMeans(n_clusters=20, n_jobs=-1)

df['feature'] = df['feature'].apply(lambda x: x[0].reshape(-1).tolist())
X = np.array(df['feature'].values.tolist())
cluster = cluster.fit(X)

label_d = dict()
category_size = len(set(cluster.labels_))
for img_name, label in zip(df['img_name'].values, cluster.labels_):
    cluster_label = np.zeros(category_size)
    cluster_label[label] = 1
    label_d[img_name] = cluster_label

np.save('./data/voc12/cls_kmeans_labels.npy', label_d)

with open('./data/voc12/category_size.txt', mode='a') as f:
    f.write('%s %s\n' % ('kmeans_id', category_size))
import sklearn.cluster

pd_airports_notna = pd_airports.dropna()
n_clusters = 10
cluster = sklearn.cluster.KMeans(n_clusters=n_clusters,
                                 init='k-means++',
                                 n_init=10,
                                 max_iter=3000,
                                 tol=0.0001,
                                 precompute_distances='auto',
                                 verbose=0,
                                 random_state=None,
                                 copy_x=True,
                                 n_jobs=1)
cluster.fit(pd_airports_notna[["LONGITUDE", "LATITUDE"]])

cluster.cluster_centers_

from matplotlib.lines import Line2D

fig = plt.figure(figsize=(25, 20))
map = usa_map()
log, lat = pd_flights_short_ori_airport[
    'ORI_LONGITUDE'], pd_flights_short_ori_airport['ORI_LATITUDE']
log, lat = map(log, lat)
count_ori = pd_flights_short_ori_airport['COUNT_ORI_AIRPORT']
for x, y, c in zip(log, lat, count_ori):
    map.scatter(x, y, s=c / 1200, c='green')
cen_log, cen_lat = map(cluster.cluster_centers_[:, 0],
                       cluster.cluster_centers_[:, 1])
Example #11
0
import matplotlib.pyplot as plt
from sklearn import datasets
#Load Data
iris = datasets.load_iris()
X = iris.data
y = iris.target
# Step 1 Model
from sklearn import cluster
cluster = cluster.KMeans(n_clusters=2)
# Step 2 Training
cluster.fit(X)
# Step 3 Evaluation
plt.scatter(X[:, 0], X[:, 1], c=cluster.labels_)
#Mean Shift Clustering
from sklearn.cluster import MeanShift
ms = MeanShift()
ms.fit(iris.data)
from sklearn.cluster import AgglomerativeClustering
groups = AgglomerativeClustering(n_clusters=2)
groups.fit_predict(iris.data)

from sklearn.decomposition import PCA

pca = PCA(n_components=2).fit(iris.data)
pca_2d = pca.transform(iris.data)
for i in range(0, pca_2d.shape[0]):
    if ms.labels_[i] == 1:
        c1 = plt.scatter(pca_2d[i, 0], pca_2d[i, 1], c='r', marker='+')
    elif ms.labels_[i] == 0:
        c2 = plt.scatter(pca_2d[i, 0], pca_2d[i, 1], c='g', marker='o')
#plt.title('Mean shift finds 2 clusters)
Example #12
0
        img_data = rbDetection(img.data)
        w, h = original_shape = tuple(img_data.shape)
        if concated_images is None:
            concated_images = np.reshape(img_data, (w * h, -1))
        else:
            concated_images = np.r_[concated_images,
                                    np.reshape(img_data, (w * h, -1))]

    start_time = time.time()
    # Check if k-means is already initialized
    # If yes, than update (partial) fit, if not full fit.
    try:
        cluster.labels_
        cluster.partial_fit(concated_images)
    except:
        cluster.fit(concated_images)
    print(concated_images.shape[0], time.time() - start_time)

    # Get the labels (Cloud = 1, No cloud = 0)
    labels = Labels(cluster.labels_ * (-1) + 1)

    # Split the labels into the original image parts
    splitted_labels = labels.splitUp(indices_or_sections=len(image_list))
    for key, splitted_label in enumerate(splitted_labels):
        targets = None
        mini_images = None
        infos = None

        # Reshape the labels into the width and height of the image
        label = splitted_label.reshape((w, h), replace=False)
        start_time = time.time()
Example #13
0
for n in n_clusters:
    clust = cluster.KMeans(n_clusters=n).fit(data)
    pred = clust.predict(data)
    centers = clust.cluster_centers_
    score = silhouette_score(data, pred)
    print("The silhouette_score for {} clusters is {} ".format(n, score))
#silhouette score is the distance bwteen the clusters. The silhouette score
# should be max but the datapoints need to be close distance within a cluster

#################################
#have to install kelbowvisualizer
model = cluster.KMeans()
from yellowbrick.cluster import KElbowVisualizer
kelb_graph = KElbowVisualizer(model, k=(1, 8))
kelb_graph.fit(data)
kelb_graph.poof
##################################

clust_range = range(1, 10)
clust_err = []
for num_clust in clust_range:
    cluster = cluster.KMeans(num_clust)
    cluster.fit(data)
    clust_err.append(cluster.inertia_)

cluster_df = pd.DataFrame({
    "Num_cluster": clust_range,
    "Cluster_err": clust_err
})
cluster_df[0:10]
Example #14
0
print "Max. no. of cluster : ", max_cluster
Initial_Label = []
max_choromosome_length = (max_cluster) * len(Idata[0])
print "Max. length of chromosome : ", max_choromosome_length
CH = input("enter No. of chromosome : ")
T = int(input("Enter no. of generation-  "))

K = []
for i in range(1, CH + 1):
    counter += 1
    pop = []
    n = randint(2, max_cluster)
    K.insert(i, n)
    print "no. of cluster : ", n
    cluster = KMeans(n_clusters=n)
    cluster.fit(Idata)
    label = cluster.predict(Idata)
    centers = cluster.cluster_centers_
    a = centers.tolist()
    for j in range(len(a)):
        for k in range(len(Idata[0])):
            pop.append(a[j][k])
    if not max_choromosome_length - len(pop) == 0:
        extra_zero = max_choromosome_length - len(pop)
        pop.extend(0 for x in range(extra_zero))
    x.insert(i, pop)
    ss = silhouette_score(Idata, label)
    pbm = cal_pbm_index(n, Idata, centers, label)
    sil_sco.insert(i, ss)
    PBM.insert(i, pbm)
    Initial_Label.insert(i, label.tolist())