def action(self, tweets_list):
        corpus = []
        for tweet in tweets_list:
            #corpus += [t["text"]]
            tweet_str = tweet["text"].encode("utf-8")
            tweet_str = unicode(tweet_str, 'utf-8')
            corpus.append(tweet_str)

        print(corpus)

        vectorizer = CountVectorizer()
        X = vectorizer.fit_transform(corpus)
        M, P = X.shape

        dist_corpus = euclidean_distances(X)

        stwf = stopwords.words('french')
        stwf.append('les')
        vectorizer = CountVectorizer(stop_words=stwf)
        X = vectorizer.fit_transform(corpus)
        dico = vectorizer.vocabulary_

        #Tous les print regroupés ici

        print("Results of Birch algorithm")

        clusters = birch_algo(X.toarray(), None)
        quit()
    def action(self, tweets_list):
        corpus = []
        for tweet in tweets_list:
            #corpus += [t["text"]]
            tweet_str = tweet["text"].encode("utf-8")
            tweet_str = unicode(tweet_str,'utf-8')
            corpus.append(tweet_str)

        print(corpus)

        vectorizer = CountVectorizer()
        X = vectorizer.fit_transform(corpus)
        M,P=X.shape


        dist_corpus=euclidean_distances(X)

        stwf=stopwords.words('french')
        stwf.append('les')
        vectorizer=CountVectorizer(stop_words=stwf)
        X = vectorizer.fit_transform(corpus)
        dico=vectorizer.vocabulary_
        
        #Tous les print regroupés ici
        
        print("Results of Birch algorithm")

        clusters = birch_algo(X.toarray(), None)
        quit()
Esempio n. 3
0
xx, yy = np.meshgrid(xx, yy)
n_centres = np.hstack((np.ravel(xx)[:, np.newaxis],
                       np.ravel(yy)[:, np.newaxis]))

# Generate blobs to do a comparison between MiniBatchKMeans and Birch.
X, y = make_blobs(n_samples=100000, centers=n_centres, random_state=0)
   

# Use all colors that matplotlib provides by default.
colors_ = cycle(colors.cnames.keys())

fig = plt.figure(figsize=(12, 4))
fig.subplots_adjust(left=0.04, right=0.98, bottom=0.1, top=0.9)

#Compute clustering with Birch with and without the final clustering step and plot.
labels1, centroids1, n_clusters1 = birch_algo(X, clustering=None)
labels2, centroids2, n_clusters2 = birch_algo(X, clustering=100) 

labels = labels1, labels2
centroids = centroids1, centroids2
n_clusters = n_clusters1, n_clusters2


final_step = ['without global clustering', 'with global clustering']

#plot the results of birch with and without clustering.
for i in range(0, 2):
    ind = i + 1
    ax = fig.add_subplot(1, 3, ind +1)
    for this_centroids, k, col in zip(centroids[i], range(n_clusters[i]), colors_ ):
        mask = labels[i] == k