def get_reduce_cluster_train(train_x, test_x, train_y, test_y, name, k): clusters = {'kmeans': get_kmeans(k), 'exmax': get_exmax(k)} reducers = { 'pca': get_pca(), 'ica': get_ica(), 'randproj': get_randproj(), 'kernel': get_kernel() } results = [] for cluster_name, cluster in clusters.items(): for reduction_name, reducer in reducers.items(): one_hot = OneHotEncoder() # Train reduced_train = reducer.fit_transform(train_x) if cluster_name == 'exmax': cluster.fit(reduced_train) transformed_train = cluster.predict_proba(reduced_train) else: transformed_train = cluster.fit_predict(reduced_train) transformed_train = one_hot.fit_transform( transformed_train.reshape(-1, 1)).todense() nn = MLPClassifier(hidden_layer_sizes=[256] * 3, learning_rate_init=1e-2, early_stopping=True, max_iter=10000) nn.fit(transformed_train, train_y) train_acc = nn.score(transformed_train, train_y) # Test reduced_test = reducer.transform(test_x) if cluster_name == 'exmax': transformed_test = cluster.predict_proba(reduced_test) else: transformed_test = cluster.predict(reduced_test) transformed_test = one_hot.transform( transformed_test.reshape(-1, 1)).todense() test_acc = nn.score(transformed_test, test_y) results.append({ 'name': f'{name}-{reduction_name}-{cluster_name}', 'train_acc': train_acc, 'test_acc': test_acc }) df = pd.DataFrame.from_records(results, columns=['name', 'train_acc', 'test_acc']) print(df) df.to_csv(outputs_path / f'reduce-train-cluster-{name}.csv')
def createHistograms(self, dataset, descriptors, cluster): histograms = {} labels = [] for index, image in sorted(dataset.images_paths.items()): # For each descriptor of the current image histograms[index] = np.zeros(self.n_clusters) # Hopefully the labels order will be the same as the images # order... Or will it... labels.append(dataset.labels[index]) if descriptors[index] is None: continue for descriptor in descriptors[index]: prediction = cluster.predict([descriptor]) histograms[index][prediction] += 1 return (histograms, labels)
def create_sanogram(elements_set, img, error_func, replace_color=None, n_colors=5): grid_size = elements_set.block_px # block coordinates h, w = img.shape[:2] blocks = [] for iy in range(h/grid_size): for ix in range(w/grid_size): by, bx = iy*grid_size, ix*grid_size ey, ex = min(h, by+grid_size), min(w, bx+grid_size) if (ey-by < grid_size) or (ex-bx < grid_size): continue patch = img[by:ey, bx:ex] blocks.append((iy, ix, by, bx, ey, ex, patch)) bh, bw = iy+1, ix+1 # init labels unassigned. labels = np.ndarray((bh, bw), dtype=np.int32) labels[:, :] = -1 # find best patches h, w = img.shape[:2] for iy, ix, by, bx, ey, ex, patch in blocks: errors = [error_func(patch, elem) for i, elem in enumerate(elements_set.elements)] min_idx = np.argmin(errors) labels[iy, ix] = min_idx # determine the new color if replace_color == 'direct': # use mean color of the target patch directly. color_map = np.ndarray((bh, bw, 3), dtype=img.dtype) for iy, ix, by, bx, ey, ex, patch in blocks: label = labels[iy, ix] if not elements_set.elements[label].is_background: mean_color = patch[elements_set.elements[label].shape].mean(axis=0) color_map[iy, ix] = mean_color elif replace_color == 'representative': # find <n_colors> representative colors from the input image and use the nearest one for each patch. colors = img.reshape((-1, 3)) cluster = sklearn.cluster.KMeans(n_clusters=n_colors) cluster.fit(colors) # assign colors color_map = np.ndarray((bh, bw, 3), dtype=img.dtype) for iy, ix, by, bx, ey, ex, patch in blocks: label = labels[iy, ix] if not elements_set.elements[label].is_background: representative_index = cluster.predict((patch[elements_set.elements[label].shape]).mean(axis=0)) color_map[iy, ix] = cluster.cluster_centers_[representative_index] elif replace_color is None: # color is associated to the patch shape according to elements_set. color_map = None else: color_map = None print 'unknown replace_color=%s' % replace_color # apply labels res_img = np.zeros_like(img) + COLOR_BG for iy, ix, by, bx, ey, ex, patch in blocks: label = labels[iy, ix] if label >= 0: if color_map is None: res_img[by:ey, bx:ex] = elements_set.elements[label].patch else: res_img[by:ey, bx:ex][elements_set.elements[label].shape] = color_map[iy, ix] res_img[by:ey, bx:ex][~elements_set.elements[label].shape] = elements_set.background_color return res_img
# fit the model cluster = sklearn.cluster.KMeans(n_clusters=8, init='k-means++', n_init=10, max_iter=300, tol=0.0001, precompute_distances='auto', verbose=0, random_state=None, copy_x=True, n_jobs=1) cluster.fit(features_train) # %% # Predict test features result = cluster.predict(features_test) # %% result # %% # Perform a plot of the clusters import matplotlib.pyplot as plt from sklearn.decomposition import PCA # %% # Use Principal Component Analysis (PCA) to reduce the dimensions reduced_data = PCA(n_components=2).fit_transform(features_train) kmeans = KMeans(init='k-means++', n_clusters=8, n_init=10) kmeans.fit(reduced_data)
for _ in tqdm(range(50)): label_counts.append( len( np.unique(sklearn.cluster.DBSCAN().fit_predict( get_random_queryfeatures())))) print("median / mean num labels:", int(np.median(label_counts)), int(np.mean(label_counts))) avg_num_labels = int(np.median(label_counts)) 127, 130, 128, 210 queryfeats = get_random_queryfeatures() train, test = train_test_split(queryfeats, test_size=0.25) cluster = sklearn.cluster.KMeans(n_clusters=avg_num_labels).fit(train) test_pred = cluster.predict(test) ks = [avg_num_labels] chs = [sklearn.metrics.calinski_harabasz_score(test, test_pred)] dbs = [sklearn.metrics.davies_bouldin_score(test, test_pred)] sil = [sklearn.metrics.silhouette_score(test, test_pred)] for n_clust in tqdm(range(10, 300, 10)): train, test = train_test_split(queryfeats, test_size=0.25) cluster = sklearn.cluster.KMeans(n_clusters=n_clust).fit(train) test_pred = cluster.predict(test) ks.append(n_clust) ch = sklearn.metrics.calinski_harabasz_score(test, test_pred)
Initial_Label = [] max_choromosome_length = (max_cluster) * len(Idata[0]) print "Max. length of chromosome : ", max_choromosome_length CH = input("enter No. of chromosome : ") T = int(input("Enter no. of generation- ")) K = [] for i in range(1, CH + 1): counter += 1 pop = [] n = randint(2, max_cluster) K.insert(i, n) print "no. of cluster : ", n cluster = KMeans(n_clusters=n) cluster.fit(Idata) label = cluster.predict(Idata) centers = cluster.cluster_centers_ a = centers.tolist() for j in range(len(a)): for k in range(len(Idata[0])): pop.append(a[j][k]) if not max_choromosome_length - len(pop) == 0: extra_zero = max_choromosome_length - len(pop) pop.extend(0 for x in range(extra_zero)) x.insert(i, pop) ss = silhouette_score(Idata, label) pbm = cal_pbm_index(n, Idata, centers, label) sil_sco.insert(i, ss) PBM.insert(i, pbm) Initial_Label.insert(i, label.tolist())
Initial_Label=[] max_choromosome_length=(max_cluster+1)*len(Idata[0]) print "Max. length of chromosome : ", max_choromosome_length CH=input("enter No. of chromosome : ") K=[] for i in range(1,CH+1): print "----------------------------------------" counter+=1 pop = [] n=randint(2, max_cluster) K.insert(i,n) print "no. of cluster : ", n cluster = KMeans(n_clusters=n,init='random',max_iter=1) cluster.fit(Idata) label = cluster.predict(Idata) centers = cluster.cluster_centers_ print "center is : ", centers print "labels : ", label print "no. of labels :", len(label) a=centers.tolist() for j in range(len(a)): for k in range(len(Idata[0])): pop.append(a[j][k]) if not max_choromosome_length-len(pop)== 0: extra_zero= max_choromosome_length-len(pop) pop.extend(0 for x in range(extra_zero)) print "center with appended zero : ", pop print "length of center : ", len(pop)