def dbscan_tests(features, ftype='mean', eps=0.7, min_samples=2): if ftype == 'min': D = facedist.min_dist(features) elif ftype == 'max': D = facedist.max_dist(features) elif ftype == 'meanmin': D = facedist.meanmin_dist(features) else: D = facedist.mean_dist(features) print D.shape nrow = len(features) dense_distances = np.zeros((nrow, nrow), dtype=np.double) for ii in range(nrow): for jj in range(ii + 1, nrow): nn = ii + jj * (jj - 1) / 2 rd = D[nn] dense_distances[ii, jj] = rd dense_distances[jj, ii] = rd del D db = DBSCAN(eps=eps, min_samples=min_samples, metric='precomputed').fit(dense_distances) labels = db.labels_ unique, counts = np.unique(labels, return_counts=True) return np.asarray((unique, counts)).T, labels, None
def hdbscan_tests(features, ftype='mean', min_cluster_size=2): if ftype == 'min': D = facedist.min_dist(features) elif ftype == 'max': D = facedist.max_dist(features) elif ftype == 'meanmin': D = facedist.meanmin_dist(features) else: D = facedist.mean_dist(features) print D.shape nrow = len(features) dense_distances = np.zeros((nrow, nrow), dtype=np.double) for ii in range(nrow): for jj in range(ii + 1, nrow): nn = ii + jj * (jj - 1) / 2 rd = D[nn] dense_distances[ii, jj] = rd dense_distances[jj, ii] = rd del D db = hdbscan.HDBSCAN(min_cluster_size=min_cluster_size, metric='precomputed').fit(dense_distances) labels = db.labels_ probabilities = db.probabilities_ pers = db.cluster_persistence_ unique, counts = np.unique(labels, return_counts=True) return np.asarray((unique, counts)).T, labels, probabilities, pers
def dbscan_tests(features, ftype='mean', eps=0.7, min_samples=2): if ftype=='min': D = facedist.min_dist(features) elif ftype=='max': D = facedist.max_dist(features) elif ftype=='meanmin': D = facedist.meanmin_dist(features) else: D = facedist.mean_dist(features) #print D.shape db = DBSCAN(eps=eps, min_samples=min_samples, metric='precomputed').fit(D) labels = db.labels_ unique, counts = np.unique(labels, return_counts=True) return np.asarray((unique, counts)).T, labels, None
def agglo_tests(features, ftype='mean', num_cluster=8, metric='precomputed'): if ftype=='min': D = facedist.min_dist(features) elif ftype=='max': D = facedist.max_dist(features) elif ftype=='meanmin': D = facedist.meanmin_dist(features) else: D = facedist.mean_dist(features) #print D.shape Hclustering = AgglomerativeClustering(n_clusters=num_cluster, affinity=metric, linkage='average') Hclustering.fit(D) labels = Hclustering.labels_ unique, counts = np.unique(labels, return_counts=True) return np.asarray((unique, counts)).T, labels, None
def hdbscan_tests(features, ftype='mean', min_cluster_size=2): if ftype=='min': D = facedist.min_dist(features) elif ftype=='max': D = facedist.max_dist(features) elif ftype=='meanmin': D = facedist.meanmin_dist(features) else: D = facedist.mean_dist(features) #print D.shape db = hdbscan.HDBSCAN(min_cluster_size=min_cluster_size, metric='precomputed').fit(D) labels = db.labels_ probabilities = db.probabilities_ pers = db.cluster_persistence_ unique, counts = np.unique(labels, return_counts=True) return np.asarray((unique, counts)).T, labels, probabilities, pers
#import pyximport #pyximport.install() import fastdist import numpy as np import timeit import facedist FA = np.zeros((90000, 100)) facedist.mean_dist(FA) A = np.random.randn(100, 200) F = [] for i in range(1000): F.append(np.array([np.linspace(0.1, 1.5, 1700) for i in range(100)])) F = np.array(F) print F.shape D1 = np.sqrt(np.square(A[np.newaxis, :, :] - A[:, np.newaxis, :]).sum(2)) D2 = fastdist.dist(A) D3 = facedist.dist(F) print np.allclose(D1, D2) # True
#import pyximport #pyximport.install() import numpy as np import timeit import facedist A = np.random.randn(100, 200) F = [] for i in range(1000): F.append(np.array([np.linspace(0.1, 1.5, 1700) for i in range(100)])) F = np.array(F) print F.shape D1 = np.sqrt(np.square(A[np.newaxis, :, :] - A[:, np.newaxis, :]).sum(2)) D3 = facedist.mean_dist(F) print np.allclose(D1, D2) # True #print timeit.timeit('np.sqrt(np.square(A[np.newaxis,:,:]-A[:,np.newaxis,:]).sum(2))', number=100, setup='import numpy as np; A = np.random.randn(100, 200)')
import timeit import facedist A = np.random.randn(100, 200) F = [] for i in range(1000): F.append(np.array([np.linspace(0.1,1.5,1700) for i in range(100)])) F = np.array(F) print F.shape D1 = np.sqrt(np.square(A[np.newaxis,:,:]-A[:,np.newaxis,:]).sum(2)) D3 = facedist.mean_dist(F) print np.allclose(D1, D2) # True #print timeit.timeit('np.sqrt(np.square(A[np.newaxis,:,:]-A[:,np.newaxis,:]).sum(2))', number=100, setup='import numpy as np; A = np.random.randn(100, 200)')
import networkx as nx fileDir = os.path.dirname(os.path.realpath(__file__)) # Load features array from disk features = np.load(os.path.join(fileDir, 'features_3MONTH_15.npy')) print 'Loaded feature:', features.shape test = features[:1000] print test.shape start = time.time() D = facedist.mean_dist(test) print 'D64 D:', D.shape print 'D64 sys-size:', sys.getsizeof(D) print 'D64 np nbytes:', D.nbytes nrow = len(test) fdense_distances = np.zeros((nrow, nrow), dtype=np.double) for ii in range(nrow): for jj in range(ii + 1, nrow): nn = ii + jj * (jj - 1) / 2 rd = D[nn] fdense_distances[ii, jj] = rd fdense_distances[jj, ii] = rd
#import pyximport #pyximport.install() import fastdist import numpy as np import timeit import facedist FA = np.zeros((90000, 100)) facedist.mean_dist(FA) A = np.random.randn(100, 200) F = [] for i in range(1000): F.append(np.array([np.linspace(0.1,1.5,1700) for i in range(100)])) F = np.array(F) print F.shape
fileDir = os.path.dirname(os.path.realpath(__file__)) # Load features array from disk features = np.load(os.path.join(fileDir,'features_30sec_fixed.npy')) print 'Loaded feature:', features.shape test = features[:10000] print test.shape start = time.time() D = facedist.mean_dist(test) print 'D64 sys-size:', sys.getsizeof(D) print 'D64 np nbytes:', D.nbytes nrow = len(test) dense_distances = np.zeros( (nrow, nrow), dtype=np.double) for ii in range(nrow): for jj in range(ii+1, nrow): nn = ii+jj*(jj-1)/2 rd = D[nn] dense_distances[ii, jj] = rd dense_distances[jj, ii] = rd