コード例 #1
0
def doSom():
    conn = pymongo_utill.getConnectionToMongoDB()
    db = conn['TwitterInsert']
    #users,labels,screen_names = pymongo_utill.byTimeFreq(db=db,sample=225)
    users,labels,screen_names = pymongo_utill.byTimeFreq(db=db,sample=10)

    conn.disconnect()
    #vectorizer = WordVectorizer()
    #users = vectorizer.fit_transform(users)

    clusterid, celldata = somcluster(data=users, nxgrid=21, nygrid=31, niter=500)
    plt.xlim((-5,25))
    plt.ylim((-5,35))
    """
    print(len(clusterid))
    for i,v in enumerate(clusterid):
        print("number:%s coordinates:%s name:%s class:%s" % (i, v, screen_names[i], labels[i]))

    for i, (x,y) in enumerate(clusterid):
        if labels[i] == 0:
            plt.plot(x,y,'-bo')
        if labels[i] == 1:
            plt.plot(x,y,'-ro')

    plt.show()

    for i, v in enumerate(clusterid):
        plt.annotate(xy=v, s=int(i/7))
    """
    vor = Voronoi(clusterid)
    voronoi_plot_2d(vor)

    for region in vor.regions:
        if not -1 in region:
            polygon = [vor.vertices[i] for i in region]
            plt.fill(*zip(*polygon))

    plt.show()
コード例 #2
0
    optimalK = 0
    for k in range(init_k, last_k):
        km = KMeans(n_clusters=k, init='k-means++', n_init=10).fit(X)
        label_predic = km.labels_
        score = metrics.silhouette_score(X, label_predic, metric='cosine')
        if highest_score < score:
            highest_score = score
            optimalK = k
    return optimalK


if __name__ == '__main__':

    conn = pymongo_utill.getConnectionToMongoDB()
    db = conn['TwitterInsert2']
    feature_vecs, labels, screen_names = pymongo_utill.byTimeFreq(db=db, sample=100)
    print(len(feature_vecs))
    #screen_names, labels = pymongo_utill.loadUsers(db, sample=1254)
    """
    where = []
    for threshold in [0,1]:
        where.append(np.argwhere(labels == threshold))
    centers, est, est_v = KmeansForAgeEst2(db, where, screen_names, 2)
    num_fig = 0
    """
    """
    for i, center in enumerate(centers):

        for ctr in center:
            ti = range(24)
            plt.figure(num_fig)