def cluster_kmeans(n_samples): rng = np.random.RandomState(9) kmeans = MiniBatchKMeans(n_clusters=2, random_state=rng, verbose=True, compute_labels=True) i = 0; batch_size = 10 while(i < n_samples): #partial fit 100 authors and there subsequent comparisons print "k_means partial fitting, i = ", str(i) data = extract_data(start=i, end=i+batch_size) data -= np.mean(data, axis=0) data /= np.std(data, axis=0) kmeans.partial_fit(data) i+=batch_size print "fitting of one-third data finished." return kmeans
def cluster_kmeans(n_samples): rng = np.random.RandomState(9) kmeans = MiniBatchKMeans(n_clusters=2, random_state=rng, verbose=True, compute_labels=True) i = 0 batch_size = 10 while (i < n_samples): #partial fit 100 authors and there subsequent comparisons print "k_means partial fitting, i = ", str(i) data = extract_data(start=i, end=i + batch_size) data -= np.mean(data, axis=0) data /= np.std(data, axis=0) kmeans.partial_fit(data) i += batch_size print "fitting of one-third data finished." return kmeans