def diversity_shell(S): frames = S.imagenames score_fn = S.vggmodel() features = np.zeros((len(frames), 4096), dtype=np.float32) for m, p in enumerate(frames): path = [] path.append(p) X = S.load_dataset(path) err = score_fn(X) features[m, :] = err[0] def square(list): return [i**2 for i in list] floatvec = lambda x: np.array([float(i) for i in x]) dist = lambda x, y: np.sqrt( np.sum( square( floatvec(x) / float(np.linalg.norm(x)) - floatvec(y) / float( np.linalg.norm(y))))) c = lambda x, y: dist(features[x, :], features[y, :]) b = lambda i, X: 5 if i == 0 else min( [c(X[i], X[j]) + 1e-4 for j in range(i)]) return (lambda X: (np.sum([b(i, X) for i in range(len(X))])))
def diagonality_unbalanced(ir_dft, ir_dftb): #diagonality of P https://math.stackexchange.com/questions/1392491/measure-of-how-much-diagonal-a-matrix-is Y, X = np.meshgrid(np.linspace(0, 1, ir_dft[0].size), np.linspace(0, 1, ir_dft[0].size)) C = abs(Y - X)**2 def dist(P): j = np.ones(P.shape[0]) r = np.arange(P.shape[0]) r2 = r**2 n = j @ P @ j.T sum_x = r @ P @ j.T sum_y = j @ P @ r.T sum_x2 = r2 @ P @ j.T sum_y2 = j @ P @ r2.T sum_xy = r @ P @ r.T return (n * sum_xy - sum_x * sum_y) / (np.sqrt(n * sum_x2 - sum_x**2) * np.sqrt(n * sum_y2 - sum_y**2)) # print('Case (Diagonality)') d = np.zeros((len(ir_dft), len(ir_dftb))) for i, a in enumerate(ir_dft): for j, b in enumerate(ir_dftb): # P = sink.sinkhorn(a,b, 0.003).P P = ot.unbalanced.sinkhorn_unbalanced(a, b, C, 0.004, 10**2) d[i, j] = dist(P) return d
def dissimilarityMatrix(dataset,dist): # returns the : D(nxn) from the dataset of n instances: print "Calculating Dissimilarity Matrix..." mat = [] for i in dataset : row = [] for j in dataset: row += [dist(i,j)] #print i, j ,dist(i,j) mat.append(row) mat=np.array(mat) #print mat print "...Done!" return mat
def arbitrary_distance_matrix(A, B, dist): """ returns a distance matrix of distances (by def distance must be symmetrical) :param A: :param B: :param dist: function calculating distance of two objects :return: """ ret = np.zeros([len(A), len(B)]) for i in range(len(A)): for j in range(len(B)): ret[i,j] = dist(A[i],B[j]) return ret
def spectral(): warnings.filterwarnings('ignore') X, s = sample_vecs(credit,ratio = 3) distances = dist(X,X) spec = SpectralClustering(n_clusters=2, affinity='nearest_neighbors', random_state=0) spec_labels = spec.fit(distances).labels_ f, axes = plt.subplots(1, 2) scatter(X[:, 0], X[:, 1], ax=axes[0], hue=s) scatter(X[:, 0], X[:, 1], ax=axes[1], hue=spec_labels) results = precision_recall_fscore_support(spec_labels, s,average = "binary") print("precision:", results[0]) print("recall:", results[1]) print("f1:", results[2]) plt.show() return spec_labels
def p_values(): vecs_kmeans, gt_kmeans = sample_vecs(credit,ratio = 100) vecs_fcm, gt_fcm = sample_vecs(credit,ratio = 7) vecs_gmm, gt_gmm = sample_vecs(credit,ratio = 2) vecs_spectral, gt_spectral = sample_vecs(credit,ratio = 3) vecs_dbscan, gt_dbscan = sample_vecs(credit,ratio = 1) kmeans = KMeans(n_clusters=2, random_state = 0).fit(vecs_kmeans) kmeans_labels = kmeans.labels_ if sum(kmeans_labels) > len(kmeans_labels)/2: kmeans_labels = 1 - kmeans_labels kmeans_centers = kmeans.cluster_centers_ # scatter(vecs[:,13], vecs[:,16], ax=axes[0], hue=kmeans_labels) # scatter(kmeans_centers[:,14], kmeans_centers[:,17], ax=axes[0],marker="s",s=100) fcm = FCM(n_clusters=2, m=1.1).fit(vecs_fcm) fcm_centers = fcm.centers fcm_labels = cutoff(fcm.u,0.6) #fcm_labels = fcm.u.argmax(axis = 1) if sum(fcm_labels) > len(fcm_labels)/2: fcm_labels = 1 - np.array(fcm_labels) # print('fcm_centers:\n',fcm_centers) # print('fcm_labels:\n',fcm_labels) # scatter(vecs[:,13], vecs[:,16], ax=axes[1], hue=fcm_labels) # scatter(fcm_centers[:,14], fcm_centers[:,17], ax=axes[1],marker="s",s=100) gmm = GaussianMixture(n_components=2, random_state = 0).fit(vecs_gmm) gmm_labels = gmm.predict(vecs_gmm) if sum(gmm_labels) > len(gmm_labels)/2: gmm_labels = 1 - gmm_labels warnings.filterwarnings('ignore') spectral_labels = SpectralClustering(n_clusters=2, affinity='nearest_neighbors', random_state=0).fit(dist(vecs_spectral,vecs_spectral)).labels_ # spec_labels = spec.fit(distances).labels_ # spectral_labels = spectral() db_labels = dbscan_func(vecs_dbscan,11,4.4) print("kmeans") print(randomize(kmeans_labels,gt_kmeans)) print("fcm") print(randomize(fcm_labels,gt_fcm)) print("gmm") print(randomize(gmm_labels,gt_gmm)) print("spectral") print(randomize(spectral_labels, gt_spectral)) print("dbscan") print(randomize(db_labels,gt_dbscan))
plt.scatter(x1[:, 0], y1[:], marker='o') plt.grid(axis='y', alpha=0.75) plt.xlabel('X') plt.ylabel('Y') plt.title('Distribution of the randomly generated data') #%% Fitting the Hierarchical clustering using the complete linkage approach # >> Complete linkage minimizes the maximum distance between all observations from sklearn.cluster import AgglomerativeClustering cplt_linkage_fit = AgglomerativeClustering(n_clusters=4, affinity="euclidean", linkage="complete") cplt_linkage_fit.fit(x1, y1) # Dendogram from scipy.spatial import distance_matrix as dist from scipy.cluster import hierarchy as h dist_matrix = dist(x1, x1) z = h.linkage(y=dist_matrix, method"complete", metric="euclidean") cplt_linkage_fit_dendogram = h.dendrogram(z) # Plotting clusters plt.figure(figsize=(6,4)) plt.title('Clustered Data Distribution - Complete Linkage Approach') plt.grid(axis='y', alpha=0.75) plt.grid(axis='x', alpha=0.75) plt.xlabel('X') plt.ylabel('Y') x_min, x_max = np.min(x1, axis=0), np.max(x1, axis=0) x1 = (x1 - x_min) / (x_max - x_min) for i in range(x1.shape[0]): plt.text(x1[i, 0], x1[i, 1], str(y1[i]),