def diversity_shell(S):

    frames = S.imagenames
    score_fn = S.vggmodel()
    features = np.zeros((len(frames), 4096), dtype=np.float32)
    for m, p in enumerate(frames):
        path = []
        path.append(p)
        X = S.load_dataset(path)
        err = score_fn(X)
        features[m, :] = err[0]

    def square(list):
        return [i**2 for i in list]

    floatvec = lambda x: np.array([float(i) for i in x])
    dist = lambda x, y: np.sqrt(
        np.sum(
            square(
                floatvec(x) / float(np.linalg.norm(x)) - floatvec(y) / float(
                    np.linalg.norm(y)))))
    c = lambda x, y: dist(features[x, :], features[y, :])
    b = lambda i, X: 5 if i == 0 else min(
        [c(X[i], X[j]) + 1e-4 for j in range(i)])
    return (lambda X: (np.sum([b(i, X) for i in range(len(X))])))
Exemple #2
0
def diagonality_unbalanced(ir_dft, ir_dftb):
    #diagonality of P https://math.stackexchange.com/questions/1392491/measure-of-how-much-diagonal-a-matrix-is
    Y, X = np.meshgrid(np.linspace(0, 1, ir_dft[0].size),
                       np.linspace(0, 1, ir_dft[0].size))
    C = abs(Y - X)**2

    def dist(P):
        j = np.ones(P.shape[0])
        r = np.arange(P.shape[0])
        r2 = r**2

        n = j @ P @ j.T
        sum_x = r @ P @ j.T
        sum_y = j @ P @ r.T
        sum_x2 = r2 @ P @ j.T
        sum_y2 = j @ P @ r2.T
        sum_xy = r @ P @ r.T

        return (n * sum_xy - sum_x * sum_y) / (np.sqrt(n * sum_x2 - sum_x**2) *
                                               np.sqrt(n * sum_y2 - sum_y**2))

    # print('Case (Diagonality)')
    d = np.zeros((len(ir_dft), len(ir_dftb)))
    for i, a in enumerate(ir_dft):
        for j, b in enumerate(ir_dftb):
            # P = sink.sinkhorn(a,b, 0.003).P
            P = ot.unbalanced.sinkhorn_unbalanced(a, b, C, 0.004, 10**2)
            d[i, j] = dist(P)

    return d
Exemple #3
0
def dissimilarityMatrix(dataset,dist): # returns the : D(nxn) from the dataset of n instances:
	print "Calculating Dissimilarity Matrix..."
	mat = []
	for i in dataset :
		row = []
		for j in dataset:
			row += [dist(i,j)] 
			#print i, j ,dist(i,j)
		mat.append(row)
	mat=np.array(mat)
	#print mat
	print "...Done!"
	return mat
def arbitrary_distance_matrix(A, B, dist):
    """
    returns a distance matrix of distances (by def distance must be symmetrical)
    :param A:
    :param B:
    :param dist: function calculating distance of two objects
    :return:
    """
    ret = np.zeros([len(A), len(B)])
    for i in range(len(A)):
        for j in range(len(B)):
            ret[i,j] = dist(A[i],B[j])
    return ret
def spectral():
    warnings.filterwarnings('ignore')
    X, s = sample_vecs(credit,ratio = 3)
    distances = dist(X,X)
    spec = SpectralClustering(n_clusters=2, affinity='nearest_neighbors', random_state=0)
    spec_labels = spec.fit(distances).labels_
    f, axes = plt.subplots(1, 2)
    scatter(X[:, 0], X[:, 1], ax=axes[0], hue=s)
    scatter(X[:, 0], X[:, 1], ax=axes[1], hue=spec_labels)
    results = precision_recall_fscore_support(spec_labels, s,average = "binary")
    print("precision:", results[0])
    print("recall:", results[1])
    print("f1:", results[2])
    plt.show()
    return spec_labels
def p_values():
    vecs_kmeans, gt_kmeans = sample_vecs(credit,ratio = 100)
    vecs_fcm, gt_fcm = sample_vecs(credit,ratio = 7)
    vecs_gmm, gt_gmm = sample_vecs(credit,ratio = 2)
    vecs_spectral, gt_spectral = sample_vecs(credit,ratio = 3)
    vecs_dbscan, gt_dbscan = sample_vecs(credit,ratio = 1)

    kmeans = KMeans(n_clusters=2, random_state = 0).fit(vecs_kmeans)
    kmeans_labels = kmeans.labels_
    if sum(kmeans_labels) > len(kmeans_labels)/2:
        kmeans_labels = 1 - kmeans_labels
    kmeans_centers = kmeans.cluster_centers_
#    scatter(vecs[:,13], vecs[:,16], ax=axes[0], hue=kmeans_labels)
    # scatter(kmeans_centers[:,14], kmeans_centers[:,17], ax=axes[0],marker="s",s=100)

    fcm = FCM(n_clusters=2, m=1.1).fit(vecs_fcm)
    fcm_centers = fcm.centers
    fcm_labels  = cutoff(fcm.u,0.6)
    #fcm_labels = fcm.u.argmax(axis = 1)
    if sum(fcm_labels) > len(fcm_labels)/2:
        fcm_labels = 1 - np.array(fcm_labels)
    # print('fcm_centers:\n',fcm_centers)
    # print('fcm_labels:\n',fcm_labels)
#    scatter(vecs[:,13], vecs[:,16], ax=axes[1], hue=fcm_labels)
    # scatter(fcm_centers[:,14], fcm_centers[:,17], ax=axes[1],marker="s",s=100)

    gmm = GaussianMixture(n_components=2, random_state = 0).fit(vecs_gmm)
    gmm_labels = gmm.predict(vecs_gmm)
    if sum(gmm_labels) > len(gmm_labels)/2:
        gmm_labels = 1 - gmm_labels


    warnings.filterwarnings('ignore')
    spectral_labels = SpectralClustering(n_clusters=2, affinity='nearest_neighbors', random_state=0).fit(dist(vecs_spectral,vecs_spectral)).labels_
    # spec_labels = spec.fit(distances).labels_

    # spectral_labels = spectral()

    db_labels = dbscan_func(vecs_dbscan,11,4.4)

    print("kmeans")
    print(randomize(kmeans_labels,gt_kmeans))
    print("fcm")
    print(randomize(fcm_labels,gt_fcm))
    print("gmm")
    print(randomize(gmm_labels,gt_gmm))
    print("spectral")
    print(randomize(spectral_labels, gt_spectral))
    print("dbscan")
    print(randomize(db_labels,gt_dbscan))
plt.scatter(x1[:, 0], y1[:], marker='o')
plt.grid(axis='y', alpha=0.75)
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Distribution of the randomly generated data')

#%% Fitting the Hierarchical clustering using the complete linkage approach
# >> Complete linkage minimizes the maximum distance between all observations
from sklearn.cluster import AgglomerativeClustering
cplt_linkage_fit = AgglomerativeClustering(n_clusters=4, affinity="euclidean", linkage="complete")
cplt_linkage_fit.fit(x1, y1)

# Dendogram
from scipy.spatial import distance_matrix as dist
from scipy.cluster import hierarchy as h
dist_matrix = dist(x1, x1)
z = h.linkage(y=dist_matrix, method"complete", metric="euclidean")
cplt_linkage_fit_dendogram = h.dendrogram(z)

# Plotting clusters
plt.figure(figsize=(6,4))
plt.title('Clustered Data Distribution - Complete Linkage Approach')
plt.grid(axis='y', alpha=0.75)
plt.grid(axis='x', alpha=0.75)
plt.xlabel('X')
plt.ylabel('Y')
x_min, x_max = np.min(x1, axis=0), np.max(x1, axis=0)
x1 = (x1 - x_min) / (x_max - x_min)

for i in range(x1.shape[0]):
    plt.text(x1[i, 0], x1[i, 1], str(y1[i]),