def plot_clusters(data, predicted_clusters, initialized_kmeans, number_of_clusters): for i in range(0, number_of_clusters): color = cm.nipy_spectral(float(i) / number_of_clusters) plt.scatter(data[predicted_clusters == i, 0], data[predicted_clusters == i, 1], s=50, c=color, marker='o', edgecolor=color, label='cluster %d' % (i+1)) color = cm.nipy_spectral(float(number_of_clusters) / number_of_clusters) plt.scatter(initialized_kmeans.cluster_centers_[:, 0], initialized_kmeans.cluster_centers_[:, 1], s=250, marker='*', c=color, edgecolor='black', label='centroids') plt.legend(scatterpoints=1) plt.grid() plt.tight_layout() plt.show() # ============================================TESTING======================================================= # path = "TelcoCustomerChurn.csv" # df_telco = pd.read_csv(path) # df_preprocessed = data_preprocessing(df_telco) # columns_to_standardize = ['tenure', 'MonthlyCharges', 'TotalCharges'] # df_preprocessed = standardize_columns(df_preprocessed, True, columns_to_standardize)
def plot_silhouette_tsne(o_silhouette, X_transformed, o_stat_H, rank, prefix): n_clusters = rank silhouette_avg = o_silhouette['silhouette'] sample_silhouette_values = o_silhouette['silhouette_values'] cluster_labels = o_stat_H['class0'].astype(float).astype(int) """# Create a subplot with 1 row and 2 columns""" fig, (ax1, ax2) = plt.subplots(1, 2) fig.set_size_inches(18, 7) fig.set_dpi(300) """ # The 1st subplot is the silhouette plot # The silhouette coefficient can range from -1, 1 but in this example all # lie within [-0.1, 1] """ ax1.set_xlim([-0.1, 1]) """ # The (n_clusters+1)*10 is for inserting blank space between silhouette # plots of individual clusters, to demarcate them clearly. """ ax1.set_ylim([0, len(X_transformed) + (n_clusters + 1) * 10]) y_lower = 10 for i in range(n_clusters): """# Aggregate the silhouette scores for samples belonging to cluster i, and sort them """ ith_cluster_silhouette_values = sample_silhouette_values[cluster_labels == i] ith_cluster_silhouette_values.sort() size_cluster_i = ith_cluster_silhouette_values.shape[0] y_upper = y_lower + size_cluster_i color = cm.nipy_spectral(float(i) / n_clusters) ax1.fill_betweenx(np.arange(y_lower, y_upper), 0, ith_cluster_silhouette_values, facecolor=color, edgecolor=color, alpha=0.7) """# Label the silhouette plots with their cluster numbers at the middle""" ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i+1)) """# Compute the new y_lower for next plot""" y_lower = y_upper + 10 # 10 for the 0 samples ax1.set_title("The silhouette plot for the various clusters.") ax1.set_xlabel(" ".join(["The silhouette coefficient values (mean:",str(silhouette_avg),")"])) ax1.set_ylabel("Cluster label") """# The vertical line for average silhouette score of all the values """ ax1.axvline(x=silhouette_avg, color="red", linestyle="--") ax1.set_yticks([]) # Clear the yaxis labels / ticks ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1]) """# 2nd Plot showing the actual clusters formed""" colors = cm.nipy_spectral(cluster_labels.astype(float) / n_clusters) ax2.scatter(X_transformed[:, 0], X_transformed[:, 1], marker='.', s=30, lw=0, alpha=0.7, c=colors, edgecolor='k') """ lable non-classified cells""" ax2.set_title("The visualization of the clustered data.") ax2.set_xlabel("Feature space for the 1st tsne") ax2.set_ylabel("Feature space for the 2nd tsne") plt.suptitle(("Silhouette analysis for tSNE clustering on coefficient matrix H " "with n_clusters = %d" % n_clusters),fontsize=14, fontweight='bold') #fig.savefig('.'.join([prefix, "silhouette_tsne", "png"])) fig.savefig('.'.join([prefix, "silhouette_umap", "png"]))
def compare(ests, types, name, pres): colors = np.linspace(0, 1, int(len(ests) * len(types))) if pres == True: #shifts=np.linspace(1,10+(int(len(ests)*len(types))-1),int(len(ests)*len(types))) shifts = [9] * (int(len(ests) * len(types))) #shifts[0]+=10 else: shifts = np.asarray([1] * int(len(ests) * len(types))) fig = plt.figure(1) ax = plt.subplot(111) i = 0 for type_ in types: for est in ests: if type_ == 'surhud': dat = np.genfromtxt( "/home/dominik.zuercher/Documents/RSP_Pro/Mest/redmap.dat") elif "SDSS" in type_: dat = np.genfromtxt( "/work/dominik.zuercher/DataStore/corr-pairs/Planck_SDSS/Planck_SDSS_plot(" + str(est) + ").dat") else: try: dat = np.genfromtxt( "/work/dominik.zuercher/DataStore/corr-pairs/" + str(type_) + "/" + str(type_) + "_plot(" + str(est) + ").dat") except: continue print("-----------------------------------------") print(type_, est) print(dat) print("-----------------------------------------") if type_ == 'surhud': ax.errorbar(dat[:, 0], np.multiply(dat[:, 1], 0.1), np.multiply(dat[:, 2], 0.1), fmt=".", c=cm.nipy_spectral(colors[i]), capsize=2, alpha=1, label="RedMaPPer") else: ax.errorbar(dat[:, 0], np.multiply(dat[:, 1], shifts[i]), np.multiply(dat[:, 2], shifts[i]), fmt=".", capsize=2, c=cm.nipy_spectral(colors[i]), alpha=0.8, label=str(type_) + " (" + str(est) + ")") i += 1 ax.set_title("Comparison") ax.set_xscale("log") ax.set_yscale("log") ax.set_xlabel(r"$R$ ($h^{-1}$Mpc)") ax.set_ylabel(r"$\xi^{\rm 2d}$ ($h^{-1}$Mpc)") plt.legend() plt.savefig("comparisons/" + str(name) + ".pdf") plt.close()
def silhouette_analysis(X, n_clusters, clusterer, set_lim,subtitle): # Create a subplot with 1 row and 2 columns fig, (ax1, ax2) = plt.subplots(1, 2) fig.set_size_inches(18, 7) # The 1st subplot is the silhouette plot # The silhouette coefficient can range from -1, 1 but in this example lie within [-0.1, 1] ax1.set_xlim([-0.1, 1]) # The (n_clusters+1)*10 is for inserting blank space between silhouette ax1.set_ylim([0, len(X) + (n_clusters + 1) * 10]) # plots of individual clusters, to demarcate them clearly. ax2.set_ylim(set_lim) cluster_labels = clusterer.predict(X) # The silhouette_score gives the average value for all the samples. # This gives a perspective into the density and separation of the formed clusters silhouette_avg = silhouette_score(X, cluster_labels) # Compute the silhouette scores for each sample sample_silhouette_values = silhouette_samples(X, cluster_labels) y_lower = 10 for i in range(n_clusters): # Aggregate the silhouette scores for samples belonging to cluster i, and sort them ith_cluster_silhouette_values = sample_silhouette_values[cluster_labels == i] ith_cluster_silhouette_values.sort() size_cluster_i = ith_cluster_silhouette_values.shape[0] y_upper = y_lower + size_cluster_i colors = cm.nipy_spectral(float(i)*1.3 / n_clusters) ax1.fill_betweenx(np.arange(y_lower, y_upper),0, ith_cluster_silhouette_values, facecolor=colors, edgecolor=colors, alpha=0.7) # Label the silhouette plots with their cluster numbers at the middle ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i)) # Compute the new y_lower for next plot y_lower = y_upper + 10 # 10 for the 0 samples ax1.set_title("The silhouette plot for the various clusters.") ax1.set_xlabel("The silhouette coefficient values") ax1.set_ylabel("Cluster label") # The vertical line for average silhouette score of all the values ax1.axvline(x=silhouette_avg, color="red", linestyle="--") ax1.set_yticks([]) # Clear the yaxis labels / ticks ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1]) # 2nd Plot showing the actual clusters formed colors = cm.nipy_spectral(cluster_labels.astype(float)*1.3 / n_clusters) ax2.scatter(X[:, 0], X[:, 1], marker='.', s=130, lw=0, alpha=0.7,c=colors) # Labeling the clusters centers = clusterer.cluster_centers_ # Draw white circles at cluster centers ax2.scatter(centers[:, 0], centers[:, 1],marker='o', c="white", alpha=1, s=200) for i, c in enumerate(centers): ax2.scatter(c[0], c[1], marker='$%d$' % i, alpha=1, s=100) ax2.set_title("The visualization of the clustered data.") ax2.set_xlabel("Feature space for the 1st feature") ax2.set_ylabel("Feature space for the 2nd feature") plt.suptitle("Silhouette analysis for %s clustering on sample data with n_clusters = %d and " "silhouette_score = %s" %(subtitle, n_clusters,silhouette_avg)) plt.show() return cluster_labels, centers
def write_result(og_img, img, prediction, index, epoch, resolution): try: mask = np.zeros(img.shape[1:]) # breakpoint() try: masks = prediction[0]["masks"] labels = prediction[0]["labels"].cpu() except Exception: masks = prediction["masks"] labels = prediction["labels"].cpu() for x in range(masks.shape[0] - 1, 0, -1): if len(masks.shape) == 4: tmp_mask = masks[x, 0].mul(255).byte().cpu().numpy() else: tmp_mask = masks[x].mul(255).byte().cpu().numpy() mask = np.where(tmp_mask > 0, labels[x].item(), mask) og_mask = DataSet.resize_mask(mask, og_img.size) except Exception: pass mask = Image.fromarray(np.uint8(cm.nipy_spectral(mask / mask.max()) * 255)).convert( "RGB" ) og_mask = Image.fromarray( np.uint8(cm.nipy_spectral(og_mask / og_mask.max()) * 255) ).convert("RGB") filepath = Path(f"./results/{epoch}/{resolution}/") filepath.mkdir(parents=True, exist_ok=True) img = Image.fromarray(img.mul(255).permute(1, 2, 0).byte().numpy()) img.save(filepath / f"img_{index}.png") og_img.save(filepath / f"og_img_{index}.png") mask.save(filepath / f"mask_{index}.png") og_mask.save(filepath / f"og_mask_{index}.png") blended = Image.blend(og_img, og_mask, 0.5) blended.save(filepath / f"blended_{index}.png") og_mask_np = np.asarray(og_mask).astype(np.uint8).transpose(2, 0, 1) og_img_np = np.asarray(og_img).astype(np.uint8) # breakpoint() for x in range(1, og_mask_np.max() + 1): submask = (og_mask_np[0] == x).astype(np.uint8) if submask.sum() == 0: continue contours, hierarchy = cv2.findContours( submask, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE ) r = x % 3 g = (x // 3) % 3 b = (x // 3 // 3) % 3 for contour in contours: cv2.drawContours(og_img_np, contour, -1, (r * 100, g * 100, b * 100), 5) cv2.imwrite(str(filepath / f"og_img_{index}_cnts.png"), og_img_np[:, :, ::-1])
def plot_silhouette(X, k, cluster_labels, centroids): # silhouette_score silhouette_avg = silhouette_score(X, cluster_labels) # Compute the silhouette scores for each sample sample_silhouette_values = silhouette_samples(X, cluster_labels) #--- Do the ploting fig, (ax1, ax2) = plt.subplots(1, 2) fig.set_size_inches(18, 7) ax1.set_xlim([-0.1, 1]) ax1.set_ylim([0, len(X) + (k + 1) * 10]) y_lower = 10 for i in range(k): # Aggregate the silhouette scores for samples belonging to # cluster i, and sort them ith_cluster_silhouette_values = sample_silhouette_values[cluster_labels == i] ith_cluster_silhouette_values.sort() size_cluster_i = ith_cluster_silhouette_values.shape[0] y_upper = y_lower + size_cluster_i color = cm.nipy_spectral(float(i) / k) ax1.fill_betweenx(np.arange(y_lower, y_upper), 0, ith_cluster_silhouette_values, facecolor=color, edgecolor=color, alpha=0.7) # Label the silhouette plots with their cluster numbers at the middle ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i)) # Compute the new y_lower for next plot y_lower = y_upper + 10 # 10 for the 0 samples ax1.set_title("The silhouette plot for the various clusters.") ax1.set_xlabel("The silhouette coefficient values") ax1.set_ylabel("Cluster label") # The vertical line for average silhouette score of all the values ax1.axvline(x=silhouette_avg, color="red", linestyle="--") ax1.set_yticks([]) # Clear the yaxis labels / ticks ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1]) colors = cm.nipy_spectral(cluster_labels.astype(float) / k) ax2.scatter(X[:, 0], X[:, 1], marker='o', s=100, alpha=0.5, c=colors) ax2.scatter(centroids[:, 0], centroids[:, 1], marker='x', c='k', s=200) ax2.set_xlabel('$x_1$', fontsize=16) ax2.set_ylabel('$x_2$', fontsize=16) plt.show()
def sigma_compare(ests, types, name): colors = np.linspace(0, 1, int(len(ests) * len(types))) fig = plt.figure(2) ax = plt.subplot(111) i = 0 for type_ in types: for est in ests: if type_ == 'surhud': dat = np.genfromtxt( "/home/dominik.zuercher/Documents/RSP_Pro/Mest/redmap.dat") elif "SDSS" in type_: dat = np.genfromtxt( "/work/dominik.zuercher/DataStore/corr-pairs/Planck_SDSS/Planck_SDSS_plot(" + str(est) + ").dat") else: try: dat = np.genfromtxt( "/work/dominik.zuercher/DataStore/corr-pairs/" + str(type_) + "/" + str(type_) + "_sigplot(" + str(est) + ").dat") except: continue print("-----------------------------------------") print(name) print(dat) print("-----------------------------------------") if type_ == 'surhud': ax.errorbar(dat[:, 0], dat[:, 1], dat[:, 2], fmt=".", c=cm.nipy_spectral(colors[i]), capsize=2, alpha=1, label="RedMaPPer") else: ax.errorbar(dat[:, 0], dat[:, 1], dat[:, 2], fmt=".", c=cm.nipy_spectral(colors[i]), capsize=2, alpha=0.8, label=str(type_) + " (" + str(est) + ")") i += 1 ax.set_title("Sigma Comparison") ax.set_xscale("log") ax.set_yscale("log") ax.set_xlabel(r"$R$ ($h^{-1}$Mpc)") ax.set_ylabel(r"$\Sigma_g$ ($h^{2}$Mpc^{-2})") plt.legend() plt.savefig("sigmacomparisons/" + str(name) + ".pdf") plt.close()
def plot_score(data, labels, y_true, num_clusters=10): df_embedded = TSNE(n_components=2).fit_transform(data) fig, (ax1, ax2) = plt.subplots(1, 2) fig.set_size_inches(18, 7) ax1.set_xlim([-0.1, 1]) ax1.set_ylim([0, data.shape[0] + (num_clusters + 1) * 10]) if len(np.unique(np.array(labels))) == 1: print("This time, no good.") else: silhouette_avg = silhouette_score(data, labels) sample_silhouette_values = silhouette_samples(data, labels) y_lower = 10 for i in range(num_clusters): ith_cluster_silhouette_values = sample_silhouette_values[labels == i] ith_cluster_silhouette_values.sort() size_cluster_i = ith_cluster_silhouette_values.shape[0] y_upper = y_lower + size_cluster_i color = cm.nipy_spectral(float(i) / num_clusters) ax1.fill_betweenx(np.arange(y_lower, y_upper), 0, ith_cluster_silhouette_values, facecolor=color, edgecolor=color, alpha=0.7) ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i)) y_lower = y_upper + 10 ax1.set_title("The silhouette plot for the various clusters.") ax1.set_xlabel("The silhouette plot for the various clussters.") ax1.set_ylabel("Cluster label.") ax1.axvline(x=silhouette_avg, color='red', linestyle='--') ax1.set_yticks([]) ax1.set_xticks([-0.2, 0, 0.2, 0.4, 0.6, 0.8, 1]) colors = cm.nipy_spectral(labels.astype(float) / num_clusters) ax2.scatter(df_embedded[:, 0], df_embedded[:, 1], marker='.', s=60, lw=0, alpha=0.7, c=colors, edgecolor='k') ax2.set_title("The TSNE visualisation of the clustered data.") ax2.set_xlabel("Feature space for the 1st feature.") ax2.set_ylabel("Feature space for the 2nd feature.") plt.suptitle(("Silhouette analysis for clustering on sampling data" "with n_clusters = %d" % num_clusters), fontsize=14, fontweight='bold') plt.show()
def __init__(self,world,name,parent = "Originator",color = 0,type = "Human", movementspd = 10,t0 = 0, growthrate = 0.4,deathrate = 1,renewabledepletion = 0.01,nonrenewabledepletion = 0.001,startRain = 0.7): self.Name = name self.Type = type self.Parent = parent PossibleStarts = np.where((world.Elevation > world.oLevel)*(world.RainFall > startRain)) if len(PossibleStarts[0]): pk = np.random.randint(len(PossibleStarts[0])) else: PossibleStarts = np.where((world.Elevation > world.oLevel)) pk = np.random.randint(len(PossibleStarts[0])) thept = PossibleStarts[0][pk]*world.GlobeGrid[0].shape[0] + PossibleStarts[1][pk] InitialPop = np.zeros(len(world.gridindices)) InitialPop[thept] = 1 self.InitialDistribution = InitialPop.reshape(world.GlobeGrid[0].shape) self.Population = self.InitialDistribution.copy() grs = growthrate*np.ones(len(InitialPop)) grs[world.OceanIndicator] = 0 self.growthrates = grs.reshape(world.GlobeGrid[0].shape) drs = deathrate*np.ones(len(InitialPop)) drs[world.OceanIndicator] = 0 self.deathrates = drs.reshape(world.GlobeGrid[0].shape) rds = renewabledepletion*np.ones(len(world.InitialRenew.flatten())) rds[world.OceanIndicator] = 0 self.RenewDeplete = rds nrds = nonrenewabledepletion*np.ones(len(world.InitialRenew.flatten())) nrds[world.OceanIndicator] = 0 self.NonRenewDeplete = nrds self.Movement = movementspd #if color == 'r': if hasattr(color, "__len__"): if len(color) == 3: self.BaseChromosome = color else: self.BaseChromosome = np.array(cm.nipy_spectral(np.random.rand())[:3])#np.random.rand(3) else: self.BaseChromosome = np.array(cm.nipy_spectral(np.random.rand())[:3])#np.random.rand(3) self.Chromosomes = self.BaseChromosome*self.InitialDistribution.astype(bool)[:,:,None] self.Cities = np.zeros_like(self.Population) self.History = {"Population":[self.InitialDistribution.copy()], "Genetics":[self.Chromosomes.copy()],"Cities":[self.Cities], "Time":[t0]}
def output_plot(filename, models, numberized, x_min=-0.1, x_max=1.0, y_distance=10, x_step=0.2): if (x_min < -1) or (x_max < 1) or (x_min > 1) or (x_max > 1) or (x_min > x_max): raise ValueError('Incorrect bounds for plotting silhouette score') if (y_distance <= 0): raise ValueError('Incorrect y distance value') if (x_step < 0) or ((x_max - x_min) < x_step): raise ValueError('Incorrect x step value') fig, axs = plt.subplots(1, len(models)) fig.set_size_inches(7 * len(models), 18) axs_cycle = cycle(axs) for model in models: ax1 = next(axs_cycle) number_of_clusters = model.number_of_clusters predicted_labels = model.instance.fit_predict(numberized) silhouette_avg = silhouette_score(numberized, predicted_labels) silhouette_sample_values = silhouette_samples(numberized, predicted_labels) ax1.set_xlim([x_min, x_max]) ax1.set_ylim( [0, numberized.shape[0] + (number_of_clusters + 1) * y_distance]) y_lower = y_distance for j in range(number_of_clusters): ith_cluster_values = silhouette_sample_values[predicted_labels == j] ith_cluster_values.sort() ith_cluster_size = ith_cluster_values.shape[0] y_upper = y_lower + ith_cluster_size color = cm.nipy_spectral(float(j) / number_of_clusters) ax1.fill_betweenx(np.arange(y_lower, y_upper), 0, ith_cluster_values, facecolor=color, edgecolor=color, alpha=0.7) ax1.text(-0.05, y_lower + 0.5 * ith_cluster_size, str(j)) y_lower = y_upper + 10 ax1.set_title(f'The silhouette plot for {model.name} clustering') ax1.set_xlabel('The silhouette coefficient values') ax1.set_ylabel('The index of cluster') ax1.axvline(x=silhouette_avg, color='red', linestyle='--') ax1.set_yticks([]) ax1.set_xticks(np.arange(x_min, x_max, x_step)) plt.savefig(f'{filename}.jpeg', bbox_inches='tight')
def silhouette_plot(k, cluster_labels, sample_sil_coefficients, sil_score): y_lower = 10 plt.figure() for i in range(k): ith_cluster_silhouette_values = \ sample_sil_coefficients[cluster_labels == i] ith_cluster_silhouette_values.sort() size_cluster_i = ith_cluster_silhouette_values.shape[0] y_upper = y_lower + size_cluster_i color = cm.nipy_spectral(float(i) / k) plt.fill_betweenx(np.arange(y_lower, y_upper), 0, ith_cluster_silhouette_values, facecolor=color, edgecolor=color, alpha=0.7) plt.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i)) y_lower = y_upper + 10 plt.xlabel("The silhouette coefficient values") plt.ylabel("Cluster label") plt.axvline(x=sil_score, color="red", linestyle="--") plt.savefig('exported/plots/Silhouette_graph_Origin_' + str(k) + '.png') plt.close()
def display(): glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT) glMatrixMode(GL_MODELVIEW) glLoadIdentity() v = gvd(u'め') # v = gvd(u'を') # v = gvd(u'あ') S = v.get_instance(5) for s in S: x, y = s glColor3f(1., 1., 1.) glBegin(GL_POINTS) glVertex3f(x, y, 0) glEnd() glColor4f(*(cm.nipy_spectral(np.sqrt(x * x + y * y) / 1500.))) mesh = v.get_mesh(s) glBegin(GL_TRIANGLES) for t in mesh['triangles']: for i in t: glVertex3f(*(mesh['vertices'][i])) glEnd() lines = v.get_vis_net(S) glColor4f(0.6, 0.6, 0.6, 0.8) glBegin(GL_LINES) for u, v in lines: glVertex2f(*u) glVertex2f(*v) glEnd() glutSwapBuffers()
def plot_clusters(X, y, centers=None, ax=None): colors = cm.nipy_spectral(y.astype(float) / np.unique(y).shape[0]) if ax is not None: ax.scatter(X[:, 0], X[:, 1], marker='.', lw=0, s=30, alpha=0.7, c=colors, edgecolor='k') # Draw white circles at cluster centers if centers is not None: ax.scatter(centers[:, 0], centers[:, 1], marker='o', c="white", alpha=1, s=200, edgecolor='k') for i, c in enumerate(centers): ax.scatter(c[0], c[1], marker='$%d$' % i, alpha=1, s=50, edgecolor='k') else: plt.scatter(X[:, 0], X[:, 1], c=colors, s=10) if centers is not None: plt.scatter(centers[:, 0], centers[:, 1], c='red', marker='*')
def plot_silhouette_values(n_cluster, cluster_labels, sample_silhouette_values, silhouette_avg,ax): y_lower = 10 for i in range(n_cluster): ith_cluster_silhouette_values = sample_silhouette_values[cluster_labels == i] ith_cluster_silhouette_values.sort() size_cluster_i = ith_cluster_silhouette_values.shape[0] y_upper = y_lower + size_cluster_i color = cm.nipy_spectral(float(i) / n_cluster) ax.fill_betweenx(np.arange(y_lower, y_upper), 0, ith_cluster_silhouette_values, facecolor=color, edgecolor=color, alpha=0.7) # Label the silhouette plots with their cluster numbers at the middle ax.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i)) # Compute the new y_lower for next plot y_lower = y_upper + 10 # 10 for the 0 samples ax.set_title("The silhouette plot for the various clusters.") ax.set_xlabel("The silhouette coefficient values") ax.set_ylabel("Cluster label") # The vertical line for average silhouette score of all the values ax.axvline(x=silhouette_avg, color="red", linestyle="--") ax.set_yticks([]) # Clear the yaxis labels / ticks ax.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])
def set_colors(self, n_clusters): if self.cluster_nb is not None: self.color = cm.nipy_spectral( float(self.cluster_nb + 1) / (n_clusters + 1)) if not self.no_child: self.first_child.set_colors(n_clusters) self.second_child.set_colors(n_clusters)
def create_silgraph(df, labels): sample_silhouette_values = silhouette_samples(df, labels) n_clusters = len(np.unique(labels)) y_lower = 100 fig = plt.figure() ax1 = fig.add_subplot(111) for i in range(n_clusters): ith_cluster_silhouette_values = sample_silhouette_values[labels == i] ith_cluster_silhouette_values.sort() size_cluster_i = ith_cluster_silhouette_values.shape[0] y_upper = y_lower + size_cluster_i y_upper = y_lower + size_cluster_i color = cm.nipy_spectral(float(i) / n_clusters) ax1.fill_betweenx(np.arange(y_lower, y_upper), 0, ith_cluster_silhouette_values, facecolor=color, edgecolor=color, alpha=0.7) # Label the silhouette plots with their cluster numbers at the middle ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i)) # Compute the new y_lower for next plot y_lower = y_upper + 10 # 10 for the 0 samples
def PCA(X, label, variablesName, outputDir): """ 根据两个最大的主成分进行绘图 降维为2,方便画图,输出图像并保存 :param X: 聚类的源数据 :param label: 聚完类之后的标签 :param variablesName:变量名 :return:直观的聚类结果图像 """ pca = decomposition.PCA(n_components=2) pca.fit(X) # 主城分析时每一行是一个输入数据 result = pca.transform(X) # 计算结果 plt.figure(figsize=[10, 6]) # 新建一张图进行绘制 n_clusters = len(set(label.tolist())) for i in range(result[:, 0].size): color = cm.nipy_spectral(float(label[i]) / n_clusters) plt.plot(result[i, 0], result[i, 1], c=color, marker='o', markersize=10) plt.text(result[i, 0], result[i, 1], variablesName[i]) x_label = 'PC1(%s%%)' % round( (pca.explained_variance_ratio_[0] * 100.0), 2) # x轴标签字符串 y_label = 'PC1(%s%%)' % round( (pca.explained_variance_ratio_[1] * 100.0), 2) # y轴标签字符串 plt.xlabel(x_label) # 绘制x轴标签 plt.ylabel(y_label) # 绘制y轴标签 plt.title('使用主成分分析法对高维数据进行降维,产生直观图像') # 显示并保存散点图 tick = time.time() print("当前的时间戳为:", tick) pylab.savefig(outputDir + '/result.png')
def plotSilhouette(df, n_clusters, kmeans_labels, silhouette_avg): fig, ax1 = plt.subplots(1) fig.set_size_inches(8, 6) ax1.set_xlim([-0.2, 1]) ax1.set_ylim([0, len(df) + (n_clusters + 1) * 10]) # The vertical line for average silhouette score of all the values ax1.axvline(x=silhouette_avg, color="red", linestyle="--") ax1.set_yticks([]) # Clear the yaxis labels / ticks ax1.set_xticks([-0.2, 0, 0.2, 0.4, 0.6, 0.8, 1]) plt.title(("Análise de Silhouette para K = %d" % n_clusters), fontsize=10, fontweight='bold') y_lower = 10 # Compute the silhouette scores for each sample sample_silhouette_values = silhouette_samples(df, kmeans_labels) for i in range(n_clusters): ith_cluster_silhouette_values = sample_silhouette_values[kmeans_labels == i] ith_cluster_silhouette_values.sort() size_cluster_i = ith_cluster_silhouette_values.shape[0] y_upper = y_lower + size_cluster_i color = cm.nipy_spectral(float(i) / n_clusters) ax1.fill_betweenx(np.arange(y_lower, y_upper), 0, ith_cluster_silhouette_values, facecolor=color, edgecolor=color, alpha=0.7) # Label the silhouette plots with their cluster numbers at the middle ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i)) y_lower = y_upper + 10 # Compute the new y_lower for next plot. 10 for the 0 samples plt.show()
def calculate_silhouette(X, cluster_labels, n_clusters): silhouette_avg = silhouette_score(X, cluster_labels) print(f"For n_clusters = {n_clusters}, the average silhouette_score is : {silhouette_avg}") fig = plt.figure(figsize=(12, 8)) ax = fig.gca() ax.set_xlim([-1, 1]) # The (n_clusters+1)*10 is for inserting blank space between silhouette # plots of individual clusters, to demarcate them clearly. ax.set_ylim([0, len(X) + (n_clusters + 1) * 10]) # Compute the silhouette scores for each sample sample_silhouette_values = silhouette_samples(X, cluster_labels) y_lower = 10 for i in range(n_clusters): ith_cluster_silhouette_values = sample_silhouette_values[cluster_labels == i] ith_cluster_silhouette_values.sort() size_cluster_i = ith_cluster_silhouette_values.shape[0] y_upper = y_lower + size_cluster_i color = cm.nipy_spectral(float(i) / n_clusters) ax.fill_betweenx(np.arange(y_lower, y_upper), 0, ith_cluster_silhouette_values, facecolor=color, edgecolor=color, alpha=0.7) ax.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i)) y_lower = y_upper + 10 # 10 for the 0 samples ax.set_title("The silhouette plot for the various clusters.") ax.set_xlabel("The silhouette coefficient values") ax.set_ylabel("Cluster label") ax.axvline(x=silhouette_avg, color="red", linestyle="--") ax.set_yticks([]) # Clear the yaxis labels / ticks ax.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1]) plt.suptitle(("Silhouette analysis for KMeans clustering on sample data " "with n_clusters = %d" % n_clusters), fontsize=14, fontweight='bold') fname = os.path.join(args.output, args.prefix + "-kmeans-silhouette-" + str(n_clusters) + ".png") plt.savefig(fname)
def plot_silhouette(X, cluster_labels): n_clusters = len(set(cluster_labels)) # Compute silhouette score silhouette_avg = silhouette_score(X, cluster_labels) # Compute silhouette value for each data point sample_silhouette_values = silhouette_samples(X, cluster_labels) # fig, ax = plt.subplot() # fig = plt.figure() ax = plt.gca() y_lower = 10 for i in range(n_clusters): ith_cluster_silhouette_values = \ sample_silhouette_values[cluster_labels == i] ith_cluster_silhouette_values.sort() size_cluster_i = ith_cluster_silhouette_values.shape[0] y_upper = y_lower + size_cluster_i color = cm.nipy_spectral(float(i) / n_clusters) ax.fill_betweenx(np.arange(y_lower, y_upper), 0, ith_cluster_silhouette_values, facecolor=color, edgecolor=color, alpha=0.7) ax.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i)) y_lower = y_upper + 10 ax.set_title("The silhouette plot for the various clusters.") ax.set_xlabel("The silhouette coefficient values") ax.set_ylabel("Cluster label") ax.axvline(x=silhouette_avg, color="red", linestyle="--") ax.set_yticks([]) # Clear the yaxis labels / ticks ax.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1]) plt.show()
def evolplot(tracks, lim, save="", xlabel=r'$\hat{X}\ [1]$', ylabel=r'$\hat{P}\ [1]$'): """TODO documentation """ npart = tracks.shape[0] fig, ax = plt.subplots() ax.set_aspect(1) ax.set_xlim([-lim, lim]) ax.set_ylim([-lim, lim]) colors = iter(cm.nipy_spectral(np.linspace(0, 1, npart))) for part in range(npart): ax.scatter(tracks[part, :, 0], tracks[part, :, 1], color=next(colors)) ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) fig.tight_layout() if os.path.exists(os.path.dirname(save)): plt.savefig(save, bbox_inches='tight') plt.clf() else: plt.show()
def draw_pointed_cluster_map(df, attribute='checkins'): """Draws a cluster map highlighting important points. The important point are calculated according to the attribute value. Parameters ---------- df : A pandas dataframe containing the latitude, longitude and cluster id data that will be clusterized. The columns of the dataframe must have the names 'latitude', 'longitude' and 'cluster_id'. attribute: str The attribute column name in the pandas dataframe that will be used to highlight points on the map. Rows with higher values of attribute are more highlighted. Returns ------- fig: a figure object of the matplotlib module. """ fig = plt.figure() s = list(df[attribute]/(np.mean(df[attribute]))) # 2nd Plot showing the actual clusters formed X = df[['latitude', 'longitude']].values cluster_labels = np.array(df['cluster_id']).astype(float) n_clusters = len(Counter(cluster_labels).keys()) colors = cm.nipy_spectral(cluster_labels / n_clusters) # plt.scatter(df.latitude, df.longitude, marker='.', s=s, c=df.cluster_id) plt.scatter(X[:, 0], X[:, 1], marker='.', s=30, lw=0, alpha=0.7, c=colors, edgecolor='k') plt.xlabel('Latitude') plt.ylabel('Longitude') return fig
def plot_ground_tracks(sat_groups, obs_time=dt.datetime.utcnow()): """ Generate plot of ground tracks Args: sat_groups (dict): Dictionary containing keys for each satellite group obs_time (dt.datetime): Observer time (in UTC) Returns: fig: matplotlib.figure.Figure: Figure handle """ # Initialize figure and map fig = plt.figure(figsize=(12, 10)) # Create colormap based off of number of satellite groups colors = cm.nipy_spectral(np.linspace(0, 1, len(sat_groups))) m = Basemap(projection='mill') # Use Miller project # Plot coastlines, draw label meridians and parallels. m.drawcoastlines() m.bluemarble(scale=0.2, alpha=0.95, zorder=-1) m.nightshade(obs_time, alpha=0.5, zorder=0) # Add nightshade # Plot satellites by group for ind_group, (sat_group_key, sat_group) in enumerate(sat_groups.items()): lats = [np.rad2deg(sat.sublat) for sat in sat_group] lons = [np.rad2deg(sat.sublong) for sat in sat_group] x, y = m(lons, lats) # transform coordinates m.scatter(x, y, s=40, marker='+', color=colors[ind_group], label=sat_group_key) fig.suptitle('Visible Satellites at {} (UTC)'.format(obs_time.strftime("%d %b %Y %H:%M:%S"))) fig.legend() return fig
def graficarSilhouette(k, labels, sample_silhouette_values, silhouette_avg): fig, ax1 = plt.subplots(1, 1) y_lower = 10 for i in range(k): ith_cluster_silhouette_values = \ sample_silhouette_values[labels == i] ith_cluster_silhouette_values.sort() size_cluster_i = ith_cluster_silhouette_values.shape[0] y_upper = y_lower + size_cluster_i color = cm.nipy_spectral(float(i) / k) ax1.fill_betweenx(np.arange(y_lower, y_upper), 0, ith_cluster_silhouette_values, facecolor=color, edgecolor=color, alpha=0.7) ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i)) y_lower = y_upper + 10 ax1.set_title("Plot del silhouette de cada cluster") ax1.set_xlabel("Coeficiente de silhouette") ax1.set_ylabel("Etiqueta del cluster") ax1.axvline(x=silhouette_avg, color="red", linestyle="--") ax1.set_yticks([])
def PCA(X, label, cities, method, height): #根据两个最大的主成分进行绘图 #选择方差95%的占比 pca = decomposition.PCA(n_components=0.95) pca.fit(X) #主城分析时每一行是一个输入数据 result = pca.transform(X) #计算结果 plt.figure(figsize=[10, 6]) #新建一张图进行绘制 plt.rcParams['font.size'] = 14 n_clusters = len(set(label.tolist())) print("When Height = %d, n_clusters = %d." % (height, n_clusters)) for i in range(result[:, 0].size): color = cm.nipy_spectral(float(label[i]) / n_clusters) plt.plot(result[i, 0], result[i, 1], c=color, marker='o', markersize=10) plt.text(result[i, 0], result[i, 1], cities[i]) x_label = 'PC1(%s%%)' % round( (pca.explained_variance_ratio_[0] * 100.0), 2) #x轴标签字符串 y_label = 'PC1(%s%%)' % round( (pca.explained_variance_ratio_[1] * 100.0), 2) #y轴标签字符串 plt.xlabel(x_label) #绘制x轴标签 plt.ylabel(y_label) #绘制y轴标签 plt.title('Height = %d (%s)' % (height, method)) plt.show()
def plot_silhoutte_3d(X): range_n_clusters = [2, 3, 4, 5, 6, 10] for n_clusters in range_n_clusters: fig = plt.figure() ax = fig.add_subplot(111, projection='3d') clusterer = KMeans_SK(n_clusters=n_clusters, random_state=10) cluster_labels = clusterer.fit_predict(X) silhouette_avg = silhouette_score(X, cluster_labels) print("For n_clusters =", n_clusters, "The average silhouette_score is :", silhouette_avg) colors = cm.nipy_spectral(cluster_labels.astype(float) / n_clusters) ax.scatter(X[:, 0], X[:, 1], X[:, 2], marker='.', s=30, lw=0, alpha=0.7, c=colors, edgecolor='k') centers = clusterer.cluster_centers_ # Draw white circles at cluster centers ax.scatter(centers[:, 0], centers[:, 1], centers[:, 2]+0.5, marker='o', c="black", alpha=.2, s=200, edgecolor='k') for i, c in enumerate(centers): ax.scatter(c[0], c[1], c[2]+0.5, marker='$%d$' % i, alpha=.5, s=50, edgecolor='k') ax.set_title("The visualization of the clustered data.") ax.set_xlabel("Feature space for the 1st feature") ax.set_ylabel("Feature space for the 2nd feature") plt.suptitle(("KMeans clustering on sample data " "with n_clusters = %d" % n_clusters), fontsize=14, fontweight='bold') plt.show()
def silhouettePlot(d, cluster_labels): fig, ax = plt.subplots(1, 1) y_lower_bound = 10 sil_avg = silhouette_score(d, labels=cluster_labels, metric='euclidean', random_state=seed) silhouette_values = silhouette_samples(d, cluster_labels) nbr_clusters = len(set(cluster_labels)) for i in range(nbr_clusters): ith_cluster_silhouette_values = silhouette_values[cluster_labels == i] ith_cluster_silhouette_values.sort() ith_cluster_size = ith_cluster_silhouette_values.shape[0] y_upper_bound = y_lower_bound + ith_cluster_size color = cm.nipy_spectral(float(i) / nbr_clusters) ax.fill_betweenx(np.arange(y_lower_bound, y_upper_bound), 0, ith_cluster_silhouette_values, facecolor=color, edgecolor=color) ax.text(-0.05, y_lower_bound + 0.5 * ith_cluster_size, str(i)) y_lower_bound = y_upper_bound + 10 ax.set_title("Silhouette Plot for {} Clusters".format(nbr_clusters)) ax.set_xlabel("Silhouette Coefficients") ax.set_ylabel("Cluster Label by Sample") ax.axvline(x=sil_avg, color="red", linestyle="--") ax.set_yticks([]) plt.show() return
def silhouettes(name, X_train, max_clusters = 15, min_clusters = 5, save = False): ''' ''' X_train = X_train.copy() num_numerical = ds.get_number_numerical(name) X_train_s_numerical = split.standardize(name, X_train).iloc[:,0:num_numerical] cluster_range = range(min_clusters,max_clusters+1) for clusters in cluster_range: fig, ax = plt.subplots() fig.set_size_inches(18, 7) # The 1st subplot is the silhouette plot # The silhouette coefficient can range from -1, 1 but in this example all # lie within [-0.1, 1] ax.set_xlim([-0.7, 1]) # The (n_clusters+1)*10 is for inserting blank space between silhouette # plots of individual clusters, to demarcate them clearly. ax.set_ylim([0, len(X_train_s_numerical) + (clusters + 1) * 10]) cluster_labels = kmeans(name, clusters, X_train_s_numerical).predict(X_train_s_numerical) silhouette_avg = silhouette_score(X_train_s_numerical, cluster_labels) print("For n_clusters =", clusters, "The average silhouette_score is :", silhouette_avg) cluster_silhouette = silhouette_samples(X_train_s_numerical, cluster_labels) y_lower = 10 for i in range(clusters): ith_cluster_silhouette_values = cluster_silhouette[cluster_labels == i] ith_cluster_silhouette_values.sort() size_cluster_i = ith_cluster_silhouette_values.shape[0] y_upper = y_lower + size_cluster_i color = cm.nipy_spectral(float(i) / clusters) ax.fill_betweenx(np.arange(y_lower, y_upper), 0, ith_cluster_silhouette_values, facecolor=color, edgecolor=color, alpha=0.7) # Label the silhouette plots with their cluster numbers at the middle ax.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i)) # Compute the new y_lower for next plot y_lower = y_upper + 10 # 10 for the 0 samples ax.set_title("The silhouette plot for the various clusters.") ax.set_xlabel("The silhouette coefficient values") ax.set_ylabel("Cluster label") # The vertical line for average silhouette score of all the values ax.axvline(x=silhouette_avg, color="red", linestyle="--") ax.set_yticks([]) # Clear the yaxis labels / ticks ax.set_xticks(np.arange(-0.6,1.1,0.2)) plt.show() if save: to_save = Path().resolve().joinpath('data', 'visualizations', '{}_elbow.png'.format(name)) fig.savefig(to_save)
def plot_silhouette(data, cluster_labels, title=""): silhouette_avg = silhouette_score(data, cluster_labels) silhouette_values = silhouette_samples(data, cluster_labels) fig, ax = plt.subplots(figsize=(10, 8)) n_clusters = np.unique(np.array(cluster_labels)) y_lower = 10 for i in n_clusters: cluster_silhouette_values = silhouette_values[cluster_labels == i] cluster_silhouette_values.sort() size_cluster = cluster_silhouette_values.shape[0] y_upper = y_lower + size_cluster color = cm.nipy_spectral(float(i) / 1) ax.fill_betweenx(np.arange(y_lower, y_upper), 0, cluster_silhouette_values, facecolor=color, edgecolor=color, alpha=0.7) ax.text(-0.05, y_lower + 0.5 * size_cluster, str(i)) y_lower = y_upper + 10 ax.axvline(x=silhouette_avg, color="red", linestyle="--") ax.set_yticks([]) ax.set_xlim((-1, 1)) ax.set_ylim([0, len(data) + (len(n_clusters) + 1) * 10]) ax.text(-0.12, (len(data) + (len(n_clusters) + 1) * 10) / 2., "Cluster label", rotation=90) ax.set_title(f"The silhouette plot {title}") ax.set_xlabel("The silhouette coefficient values")
def silhouette(): if not os.path.exists("Stardust_results"): print( "The directory structure Stardust_results doest not exist. Please run run_stardust first" ) sys.exit() if not os.path.exists("Stardust_results/analysis"): os.mkdir("Stardust_results/analysis") output_path = "Stardust_results/analysis/" from sklearn.metrics import silhouette_samples, silhouette_score data_df = pd.read_csv( 'Stardust_results/visualization_output/3_pass/data.csv', delimiter=",", index_col=False) data_df.set_index('data', inplace=True) silhouette_avg = silhouette_score(data_df[['x', 'y']], data_df['cluster']) sample_silhouette_values = silhouette_samples(data_df[['x', 'y']], data_df['cluster']) print("silhouette score ", silhouette_avg) y_lower = 10 import matplotlib.cm as cm fig = plt.figure(figsize=(4, 7)) n_clusters = len(list(data_df['cluster'].unique())) for i in range(n_clusters): # Aggregate the silhouette scores for samples belonging to # cluster i, and sort them ith_cluster_silhouette_values = \ sample_silhouette_values[data_df['cluster'] == i] ith_cluster_silhouette_values.sort() size_cluster_i = ith_cluster_silhouette_values.shape[0] y_upper = y_lower + size_cluster_i color = cm.nipy_spectral(float(i) / n_clusters) plt.fill_betweenx(np.arange(y_lower, y_upper), 0, ith_cluster_silhouette_values, facecolor=color, edgecolor=color, alpha=0.7) # Label the silhouette plots with their cluster numbers at the middle plt.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i)) # Compute the new y_lower for next plot y_lower = y_upper + 10 # 10 for the 0 samples plt.title("The silhouette plot for the various clusters.") plt.xlabel("silhouette coefficient", fontsize=20) plt.ylabel("Cluster label", fontsize=20) plt.axvline(x=silhouette_avg, color="red", linestyle="--") plt.yticks([]) # Clear the yaxis labels / ticks plt.xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1]) sns.despine(bottom=False, left=False) fig.savefig(output_path + "/silhouette.pdf", bbox_inches='tight', dpi=600) fig.savefig(output_path + "/silhouette.png", bbox_inches='tight', dpi=600)
def plotAstar(length, width, height, paths, gates): fig = plt.figure() ax = fig.add_subplot(111, projection='3d') colors = cm.nipy_spectral(np.linspace(0, 1, len(paths))) usedColors = [] for path in paths: x = path[0] y = path[1] z = path[2] # to create a nice mix of colors: pick a random not used color randIndex = randint(0, len(colors)) while randIndex in usedColors: randIndex = randint(0, len(colors)) c = colors[randIndex] usedColors.append(randIndex) ax.plot(x, y, z, color=c, zorder=-1) x = [] y = [] z = [] for gate in gates: x.append(gate.getX()) y.append(gate.getY()) z.append(gate.getZ()) ax.scatter(x, y, z, c='black', marker="s", s=60, zorder=10) ax.set_xlim3d([0, length-1]) ax.set_ylim3d([0, width-1]) ax.set_zlim3d([0, height-1]) ax.set_xlabel('X') ax.set_ylabel('Y') ax.set_zlabel('Z') title = str(len(paths)) + " Random Connections" plt.title(title) # mngr = plt.get_current_fig_manager() # mngr.window.setGeometry(50,100,640,545) plt.show(block=False)
def visualizeGrid(self, length, width, height, paths, gates): f = plt.figure() # a tk.DrawingArea self.canvas2 = FigureCanvasTkAgg(f, self.master) self.canvas2.show() self.canvas2.get_tk_widget().pack(side=RIGHT, fill=BOTH, expand=1) ax = f.add_subplot(111, projection='3d') # create a color palette colors = cm.nipy_spectral(np.linspace(0, 1, len(paths))) usedColors = [] for path in paths: x = path[0] y = path[1] z = path[2] # to create a nice mix of colors: pick a random not used color randIndex = randint(0, len(colors)-1) while randIndex in usedColors: randIndex = randint(0, len(colors)-1) c = colors[randIndex] usedColors.append(randIndex) ax.plot(x, y, z, color=c, zorder=-1) x = [] y = [] z = [] for gate in gates: x.append(gate.getX()) y.append(gate.getY()) z.append(gate.getZ()) ax.scatter(x, y, z, c='black', marker="s", s=60, zorder=10) ax.set_xlim3d([length-1, 0]) ax.set_ylim3d([0, width-1]) ax.set_zlim3d([0, height-1]) ax.set_xlabel('X') ax.set_ylabel('Y') ax.set_zlabel('Layer') title = str(len(paths)) + " Random Connections" plt.title(title)
def create_scatter(self, size=100, filename=None): ''' create scatter plot of the clusters found ''' num_k = len(set(self.k_fit)) # number of kernels plt.figure(figsize=(15, 15)) x = numpy.arange(num_k) # TODO: yys is unused!! yys = [i + x + (i * x)**2 for i in range(num_k)] colors = cm.nipy_spectral(numpy.linspace(0, 1, num_k)) for idx in range(0, num_k): plt.scatter(self.pos[numpy.where(self.k_fit == idx), 0], self.pos[numpy.where(self.k_fit == idx), 1], s=100, label=str(idx), c=colors[idx]) plt.legend() if filename == None: plt.show() else: plt.savefig(filename, dpi=300) plt.close()
km = factory(n_clusters=n_clusters, init=init, random_state=run_id, n_init=n_init, **params).fit(X) inertia[i, run_id] = km.inertia_ p = plt.errorbar(n_init_range, inertia.mean(axis=1), inertia.std(axis=1)) plots.append(p[0]) legends.append("%s with %s init" % (factory.__name__, init)) plt.xlabel('n_init') plt.ylabel('inertia') plt.legend(plots, legends) plt.title("Mean inertia for various k-means init across %d runs" % n_runs) # Part 2: Qualitative visual inspection of the convergence X, y = make_data(random_state, n_samples_per_center, grid_size, scale) km = MiniBatchKMeans(n_clusters=n_clusters, init='random', n_init=1, random_state=random_state).fit(X) plt.figure() for k in range(n_clusters): my_members = km.labels_ == k color = cm.nipy_spectral(float(k) / n_clusters, 1) plt.plot(X[my_members, 0], X[my_members, 1], 'o', marker='.', c=color) cluster_center = km.cluster_centers_[k] plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=color, markeredgecolor='k', markersize=6) plt.title("Example cluster allocation with a single random init\n" "with MiniBatchKMeans") plt.show()
# Compute the silhouette scores for each sample sample_silhouette_values = silhouette_samples(X, cluster_labels) y_lower = 10 for i in range(n_clusters): # Aggregate the silhouette scores for samples belonging to # cluster i, and sort them ith_cluster_silhouette_values = \ sample_silhouette_values[cluster_labels == i] ith_cluster_silhouette_values.sort() size_cluster_i = ith_cluster_silhouette_values.shape[0] y_upper = y_lower + size_cluster_i color = cm.nipy_spectral(float(i) / n_clusters) ax1.fill_betweenx(np.arange(y_lower, y_upper), 0, ith_cluster_silhouette_values, facecolor=color, edgecolor=color, alpha=0.7) # Label the silhouette plots with their cluster numbers at the middle ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i)) # Compute the new y_lower for next plot y_lower = y_upper + 10 # 10 for the 0 samples ax1.set_title("The silhouette plot for the various clusters.") ax1.set_xlabel("The silhouette coefficient values") ax1.set_ylabel("Cluster label") # The vertical line for average silhouette score of all the values
print 'measDip_srt', measDip_srt dots = ['QD1','QD2','QD3','QD4','QD5','QD6','QD7','QD8','QD9'] my_xticks = dots x = na.arange(1, len(dots)+1, 1, dtype=na.int8) print 'x', x x_box_len = 5 y_box_len = 6 #y = measDip_srt[0] #yerr = measDip_srt[1] y = a[srt]*1e3 yerr = a_err[srt]*1e3 colors = mplcm.nipy_spectral(np.linspace(0, 1, len(dots))) for it in range(len(y)): ax.errorbar(x[it], y[it], yerr=yerr[it], fmt='o', color=colors[it], markeredgecolor=colors[it]) ax.axhline(np.mean(y), color='black', ls=':', lw=2) print 'mean', np.mean(y) ax.text(0.9, -0.47, r'$-$%.2f' %(np.abs(np.mean(y))), fontsize=12) #ax.set_ylim(-0.55,0) #ax1.set_ylim(-0.55,0) ax.set_ylim(-0.6,0) #ax1.set_ylim(-0.6,0) ax.set_xlim(0.8,9.2) #ax.set_ylabel(r'$\frac{1}{e}p^{\mathrm{static}}_{z}$') ax.set_ylabel(r'$A^{\mathrm{QD}}$ $\mathrm{(nm/GPa)}$', fontsize=15) ax.set_xlabel('$\mathrm{Measured}$ $\mathrm{QD}$ $\mathrm{no.}$', fontsize=15) ax.tick_params( labeltop=False, labelbottom=True, labelleft=True)
profileDict = {} # create dictionary to store diagraph instances for dia,time in zip(listdia,listtime): if len(dia)>1: if dia[0]+dia[1] in profileDict: # index dictionary at diagraph, check if it exists profileDict[dia[0]+dia[1]].append(time) # add time value to list else: # if the the index is not found profileDict[dia[0]+dia[1]] = [] # create a list there profileDict[dia[0]+dia[1]].append(time) # add time value to list profiles.append(profileDict) # add list to profile list commonDiagraphs = open('/home/andrew/Documents/Research/keystroke-authentication/keystroke goats/DIAGRAPHS_ETC.txt') diagraphsToTest = commonDiagraphs.read() allTestDia = diagraphsToTest.rstrip('\n').split(' ') #diagraphToTest = raw_input("Enter the diagraph to visualize: ") #numberOfUsers = raw_input("Enter the number of users you would like to test: ") # UNCOMMENT FOR LARGER SET testUserList = [] colormap = cm.nipy_spectral(np.linspace(0,.9,len(profiles))) for number,diagraphToTest in enumerate(allTestDia): plt.figure(number) if len(diagraphToTest) == 1: diagraphToTest = " " + diagraphToTest for numUsers,uname in enumerate(namelist): testUserList.append(numUsers)#raw_input("Enter user " + str(numUsers) + ": ")) #UNCOMMENT FOR LARGER SET timeToTest = [] # list to contain all times for a given diagraph for user timeToCompare = [] # list for times to compare to if diagraphToTest in profiles[numUsers]: for instance in profiles[numUsers][diagraphToTest]: timeToTest.append(instance) if len(timeToTest)>1: #kde = stats.kde.gaussian_kde(timeToTest) kde = sm.nonparametric.KDEUnivariate(timeToTest) # calculate density function for all times for given diagraph kde.fit(bw=nrd0(timeToTest))