def compute_bench_2(chunks): results = defaultdict(lambda: []) n_features = 50000 means = np.array([[1, 1], [-1, -1], [1, -1], [-1, 1], [0.5, 0.5], [0.75, -0.5], [-1, 0.75], [1, 0]]) X = np.empty((0, 2)) for i in xrange(8): X = np.r_[X, means[i] + 0.8 * np.random.randn(n_features, 2)] max_it = len(chunks) it = 0 for chunk in chunks: it += 1 print '==============================' print 'Iteration %03d of %03d' % (it, max_it) print '==============================' print '' print 'Fast K-Means' tstart = time() mbkmeans = MiniBatchKMeans(init='k-means++', k=8, chunk_size=chunk) mbkmeans.fit(X) delta = time() - tstart print "Speed: %0.3fs" % delta print "Inertia: %0.3fs" % mbkmeans.inertia_ print '' results['minibatchkmeans_speed'].append(delta) results['minibatchkmeans_quality'].append(mbkmeans.inertia_) return results
def compute_bench_2(chunks): results = defaultdict(lambda: []) n_features = 50000 means = np.array([[1, 1], [-1, -1], [1, -1], [-1, 1], [0.5, 0.5], [0.75, -0.5], [-1, 0.75], [1, 0]]) X = np.empty((0, 2)) for i in xrange(8): X = np.r_[X, means[i] + 0.8 * np.random.randn(n_features, 2)] max_it = len(chunks) it = 0 for chunk in chunks: it += 1 print "==============================" print "Iteration %03d of %03d" % (it, max_it) print "==============================" print "" print "Fast K-Means" tstart = time() mbkmeans = MiniBatchKMeans(init="k-means++", k=8, chunk_size=chunk) mbkmeans.fit(X) delta = time() - tstart print "Speed: %0.3fs" % delta print "Inertia: %0.3fs" % mbkmeans.inertia_ print "" results["minibatchkmeans_speed"].append(delta) results["minibatchkmeans_quality"].append(mbkmeans.inertia_) return results
def compute_bench(samples_range, features_range): it = 0 iterations = 200 results = defaultdict(lambda: []) chunk = 100 max_it = len(samples_range) * len(features_range) for n_samples in samples_range: for n_features in features_range: it += 1 print '==============================' print 'Iteration %03d of %03d' %(it, max_it) print '==============================' print '' data = nr.random_integers(-50, 50, (n_samples, n_features)) print 'K-Means' tstart = time() kmeans = KMeans(init='k-means++', k=10).fit(data) delta = time() - tstart print "Speed: %0.3fs" % delta print "Inertia: %0.5f" % kmeans.inertia_ print '' results['kmeans_speed'].append(delta) results['kmeans_quality'].append(kmeans.inertia_) print 'Fast K-Means' # let's prepare the data in small chunks mbkmeans = MiniBatchKMeans(init='k-means++', k=10, chunk_size=chunk) tstart = time() mbkmeans.fit(data) delta = time() - tstart print "Speed: %0.3fs" % delta print "Inertia: %f" % mbkmeans.inertia_ print '' print '' results['minibatchkmeans_speed'].append(delta) results['minibatchkmeans_quality'].append(mbkmeans.inertia_) return results
def compute_bench(samples_range, features_range): it = 0 iterations = 200 results = defaultdict(lambda: []) chunk = 100 max_it = len(samples_range) * len(features_range) for n_samples in samples_range: for n_features in features_range: it += 1 print '==============================' print 'Iteration %03d of %03d' % (it, max_it) print '==============================' print '' data = nr.random_integers(-50, 50, (n_samples, n_features)) print 'K-Means' tstart = time() kmeans = KMeans(init='k-means++', k=10).fit(data) delta = time() - tstart print "Speed: %0.3fs" % delta print "Inertia: %0.5f" % kmeans.inertia_ print '' results['kmeans_speed'].append(delta) results['kmeans_quality'].append(kmeans.inertia_) print 'Fast K-Means' # let's prepare the data in small chunks mbkmeans = MiniBatchKMeans(init='k-means++', k=10, chunk_size=chunk) tstart = time() mbkmeans.fit(data) delta = time() - tstart print "Speed: %0.3fs" % delta print "Inertia: %f" % mbkmeans.inertia_ print '' print '' results['minibatchkmeans_speed'].append(delta) results['minibatchkmeans_quality'].append(mbkmeans.inertia_) return results