Beispiel #1
0
def get_reduce_cluster_train(train_x, test_x, train_y, test_y, name, k):
    clusters = {'kmeans': get_kmeans(k), 'exmax': get_exmax(k)}
    reducers = {
        'pca': get_pca(),
        'ica': get_ica(),
        'randproj': get_randproj(),
        'kernel': get_kernel()
    }

    results = []
    for cluster_name, cluster in clusters.items():
        for reduction_name, reducer in reducers.items():
            one_hot = OneHotEncoder()

            # Train
            reduced_train = reducer.fit_transform(train_x)
            if cluster_name == 'exmax':
                cluster.fit(reduced_train)
                transformed_train = cluster.predict_proba(reduced_train)
            else:
                transformed_train = cluster.fit_predict(reduced_train)
                transformed_train = one_hot.fit_transform(
                    transformed_train.reshape(-1, 1)).todense()

            nn = MLPClassifier(hidden_layer_sizes=[256] * 3,
                               learning_rate_init=1e-2,
                               early_stopping=True,
                               max_iter=10000)
            nn.fit(transformed_train, train_y)
            train_acc = nn.score(transformed_train, train_y)

            # Test
            reduced_test = reducer.transform(test_x)
            if cluster_name == 'exmax':
                transformed_test = cluster.predict_proba(reduced_test)
            else:
                transformed_test = cluster.predict(reduced_test)
                transformed_test = one_hot.transform(
                    transformed_test.reshape(-1, 1)).todense()

            test_acc = nn.score(transformed_test, test_y)

            results.append({
                'name': f'{name}-{reduction_name}-{cluster_name}',
                'train_acc': train_acc,
                'test_acc': test_acc
            })

    df = pd.DataFrame.from_records(results,
                                   columns=['name', 'train_acc', 'test_acc'])
    print(df)
    df.to_csv(outputs_path / f'reduce-train-cluster-{name}.csv')
    def createHistograms(self, dataset, descriptors, cluster):
        histograms = {}
        labels = []
        for index, image in sorted(dataset.images_paths.items()):
            # For each descriptor of the current image
            histograms[index] = np.zeros(self.n_clusters)
            # Hopefully the labels order will be the same as the images
            # order... Or will it...
            labels.append(dataset.labels[index])
            if descriptors[index] is None: continue
            for descriptor in descriptors[index]:
                prediction = cluster.predict([descriptor])
                histograms[index][prediction] += 1

        return (histograms, labels)
Beispiel #3
0
def create_sanogram(elements_set, img, error_func, replace_color=None, n_colors=5):
    grid_size = elements_set.block_px
    # block coordinates
    h, w = img.shape[:2]
    blocks = []
    for iy in range(h/grid_size):
        for ix in range(w/grid_size):
            by, bx = iy*grid_size, ix*grid_size
            ey, ex = min(h, by+grid_size), min(w, bx+grid_size)
            if (ey-by < grid_size) or (ex-bx < grid_size): continue
            patch = img[by:ey, bx:ex]
            blocks.append((iy, ix, by, bx, ey, ex, patch))
    bh, bw = iy+1, ix+1
    # init labels unassigned.
    labels = np.ndarray((bh, bw), dtype=np.int32)
    labels[:, :] = -1
    # find best patches
    h, w = img.shape[:2]
    for iy, ix, by, bx, ey, ex, patch in blocks:
        errors = [error_func(patch, elem) for i, elem in enumerate(elements_set.elements)]
        min_idx = np.argmin(errors)
        labels[iy, ix] = min_idx
    # determine the new color
    if replace_color == 'direct':
        # use mean color of the target patch directly.
        color_map = np.ndarray((bh, bw, 3), dtype=img.dtype)
        for iy, ix, by, bx, ey, ex, patch in blocks:
            label = labels[iy, ix]
            if not elements_set.elements[label].is_background:
                mean_color = patch[elements_set.elements[label].shape].mean(axis=0)
                color_map[iy, ix] = mean_color
    elif replace_color == 'representative':
        # find <n_colors> representative colors from the input image and use the nearest one for each patch.
        colors = img.reshape((-1, 3))
        cluster = sklearn.cluster.KMeans(n_clusters=n_colors)
        cluster.fit(colors)
        # assign colors
        color_map = np.ndarray((bh, bw, 3), dtype=img.dtype)
        for iy, ix, by, bx, ey, ex, patch in blocks:
            label = labels[iy, ix]
            if not elements_set.elements[label].is_background:
                representative_index = cluster.predict((patch[elements_set.elements[label].shape]).mean(axis=0))
                color_map[iy, ix] = cluster.cluster_centers_[representative_index]
    elif replace_color is None:
        # color is associated to the patch shape according to elements_set.
        color_map = None
    else:
        color_map = None
        print 'unknown replace_color=%s' % replace_color

    # apply labels
    res_img = np.zeros_like(img) + COLOR_BG
    for iy, ix, by, bx, ey, ex, patch in blocks:
        label = labels[iy, ix]
        if label >= 0:
            if color_map is None:
                res_img[by:ey, bx:ex] = elements_set.elements[label].patch
            else:
                res_img[by:ey, bx:ex][elements_set.elements[label].shape] = color_map[iy, ix]
                res_img[by:ey, bx:ex][~elements_set.elements[label].shape] = elements_set.background_color
    return res_img
# fit the model
cluster = sklearn.cluster.KMeans(n_clusters=8,
                                 init='k-means++',
                                 n_init=10,
                                 max_iter=300,
                                 tol=0.0001,
                                 precompute_distances='auto',
                                 verbose=0,
                                 random_state=None,
                                 copy_x=True,
                                 n_jobs=1)
cluster.fit(features_train)

# %%
# Predict test features
result = cluster.predict(features_test)

# %%
result

# %%
# Perform a plot of the clusters
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

# %%
# Use Principal Component Analysis (PCA) to reduce the dimensions
reduced_data = PCA(n_components=2).fit_transform(features_train)
kmeans = KMeans(init='k-means++', n_clusters=8, n_init=10)
kmeans.fit(reduced_data)
    for _ in tqdm(range(50)):
        label_counts.append(
            len(
                np.unique(sklearn.cluster.DBSCAN().fit_predict(
                    get_random_queryfeatures()))))
    print("median / mean num labels:", int(np.median(label_counts)),
          int(np.mean(label_counts)))
    avg_num_labels = int(np.median(label_counts))
    127, 130, 128, 210

    queryfeats = get_random_queryfeatures()

    train, test = train_test_split(queryfeats, test_size=0.25)
    cluster = sklearn.cluster.KMeans(n_clusters=avg_num_labels).fit(train)

    test_pred = cluster.predict(test)

    ks = [avg_num_labels]
    chs = [sklearn.metrics.calinski_harabasz_score(test, test_pred)]
    dbs = [sklearn.metrics.davies_bouldin_score(test, test_pred)]
    sil = [sklearn.metrics.silhouette_score(test, test_pred)]

    for n_clust in tqdm(range(10, 300, 10)):
        train, test = train_test_split(queryfeats, test_size=0.25)
        cluster = sklearn.cluster.KMeans(n_clusters=n_clust).fit(train)

        test_pred = cluster.predict(test)

        ks.append(n_clust)

        ch = sklearn.metrics.calinski_harabasz_score(test, test_pred)
Beispiel #6
0
Initial_Label = []
max_choromosome_length = (max_cluster) * len(Idata[0])
print "Max. length of chromosome : ", max_choromosome_length
CH = input("enter No. of chromosome : ")
T = int(input("Enter no. of generation-  "))

K = []
for i in range(1, CH + 1):
    counter += 1
    pop = []
    n = randint(2, max_cluster)
    K.insert(i, n)
    print "no. of cluster : ", n
    cluster = KMeans(n_clusters=n)
    cluster.fit(Idata)
    label = cluster.predict(Idata)
    centers = cluster.cluster_centers_
    a = centers.tolist()
    for j in range(len(a)):
        for k in range(len(Idata[0])):
            pop.append(a[j][k])
    if not max_choromosome_length - len(pop) == 0:
        extra_zero = max_choromosome_length - len(pop)
        pop.extend(0 for x in range(extra_zero))
    x.insert(i, pop)
    ss = silhouette_score(Idata, label)
    pbm = cal_pbm_index(n, Idata, centers, label)
    sil_sco.insert(i, ss)
    PBM.insert(i, pbm)
    Initial_Label.insert(i, label.tolist())
Beispiel #7
0
Initial_Label=[]
max_choromosome_length=(max_cluster+1)*len(Idata[0])
print "Max. length of chromosome : ", max_choromosome_length
CH=input("enter No. of chromosome : ")

K=[]
for i in range(1,CH+1):
    print "----------------------------------------"
    counter+=1
    pop = []
    n=randint(2, max_cluster)
    K.insert(i,n)
    print "no. of cluster : ", n
    cluster = KMeans(n_clusters=n,init='random',max_iter=1)
    cluster.fit(Idata)
    label = cluster.predict(Idata)
    centers = cluster.cluster_centers_
    print "center is  : ", centers
    print "labels : ", label
    print "no. of labels :", len(label)
    a=centers.tolist()

    for j in range(len(a)):
        for k in range(len(Idata[0])):
            pop.append(a[j][k])

    if not max_choromosome_length-len(pop)== 0:
        extra_zero= max_choromosome_length-len(pop)
        pop.extend(0 for x in range(extra_zero))
    print "center with appended zero : ", pop
    print "length of center : ", len(pop)