def doSom(): conn = pymongo_utill.getConnectionToMongoDB() db = conn['TwitterInsert'] #users,labels,screen_names = pymongo_utill.byTimeFreq(db=db,sample=225) users,labels,screen_names = pymongo_utill.byTimeFreq(db=db,sample=10) conn.disconnect() #vectorizer = WordVectorizer() #users = vectorizer.fit_transform(users) clusterid, celldata = somcluster(data=users, nxgrid=21, nygrid=31, niter=500) plt.xlim((-5,25)) plt.ylim((-5,35)) """ print(len(clusterid)) for i,v in enumerate(clusterid): print("number:%s coordinates:%s name:%s class:%s" % (i, v, screen_names[i], labels[i])) for i, (x,y) in enumerate(clusterid): if labels[i] == 0: plt.plot(x,y,'-bo') if labels[i] == 1: plt.plot(x,y,'-ro') plt.show() for i, v in enumerate(clusterid): plt.annotate(xy=v, s=int(i/7)) """ vor = Voronoi(clusterid) voronoi_plot_2d(vor) for region in vor.regions: if not -1 in region: polygon = [vor.vertices[i] for i in region] plt.fill(*zip(*polygon)) plt.show()
optimalK = 0 for k in range(init_k, last_k): km = KMeans(n_clusters=k, init='k-means++', n_init=10).fit(X) label_predic = km.labels_ score = metrics.silhouette_score(X, label_predic, metric='cosine') if highest_score < score: highest_score = score optimalK = k return optimalK if __name__ == '__main__': conn = pymongo_utill.getConnectionToMongoDB() db = conn['TwitterInsert2'] feature_vecs, labels, screen_names = pymongo_utill.byTimeFreq(db=db, sample=100) print(len(feature_vecs)) #screen_names, labels = pymongo_utill.loadUsers(db, sample=1254) """ where = [] for threshold in [0,1]: where.append(np.argwhere(labels == threshold)) centers, est, est_v = KmeansForAgeEst2(db, where, screen_names, 2) num_fig = 0 """ """ for i, center in enumerate(centers): for ctr in center: ti = range(24) plt.figure(num_fig)