Python Tweetsの例、tweets.Tweets Pythonの例

コード例 #1

0

ファイルを表示

ファイル: server.py プロジェクト: yjlo123/SongRecommend

def index():
    token = request.args.get('token', '')

    twitter = request.args.get('twitter', '')
    if twitter:
        t = tweets.Tweets()
        color = t.user_to_color(twitter)
    else:
        color = None

    if token:
        sp = spotipy.Spotify(auth=token)
        results = sp.current_user_saved_tracks()
        profile = sp.current_user()
        base = songBase.Songbase()
        country = profile['country']
        age = calculate_age(profile['birthdate'])
        song = base.get_song(color, country, age, [])
    else:
        results = None
        profile = None
        song = None

    return render_template('index.html',
                           token=token,
                           results=results,
                           profile=profile,
                           twitter=twitter,
                           color=color,
                           colorcode=color_to_code(color),
                           song=song)

コード例 #2

0

ファイルを表示

ファイル: tyrs.py プロジェクト: Nic0/tyrs

def init_api():
    api = tweets.Tweets()
    container.add('api', api)
    try:
        api.authentication()
    except URLError, e:
        print 'error:%s' % e
        sys.exit(1)

コード例 #3

0

ファイルを表示

 def authenticate(self):
     tyrs.init_conf()
     self.api = tweets.Tweets()
     self.api.authentication()

コード例 #4

0

ファイルを表示

ファイル: main_old.py プロジェクト: zhepingLiu/ResearchProject

def main():
    E = float(sys.argv[2])
    news_api = news.News()
    word_processor = processor.Processor()
    tweets_api = tweets.Tweets(int(sys.argv[4]))

    articles = news_api.process_news(news_api.retrieve_everything())

    data = []
    for line in open(sys.argv[1]):
        data.append(json.loads(line))

    all_tweets = tweets_api.process_tweets(data)

    all_tokens = []
    copied_tweets = list(all_tweets)

    for tweet in copied_tweets:
        tokens = word_processor.tweet_tokenize(tweet[0])
        if tokens["text"] == []:
            all_tweets.remove(tweet)
            continue
        all_tokens.append(tokens)

    all_clusters = []
    # we are computing the similarity of one tweet with all clusters
    # exists, not the similarity with other tweets
    cluster_id = 0
    for i in range(len(all_tweets)):
        tweet = all_tweets[i]
        token = all_tokens[i]
        # first cluster
        if all_clusters == []:
            new_cluster = cluster.Cluster(tweet[0], tweet[1], token,
                                          cluster_id)
            cluster_id += 1
            all_clusters.append(new_cluster)
            continue

        clustered = False
        # max_cluster_similarity = 0
        # max_cluster_index = -1

        # for single_cluster in all_clusters:
        for j in range(len(all_clusters)):
            single_cluster = all_clusters[j]
            vector = single_cluster.get_vector(False)
            # no common words between the tweet and the cluster, skip
            common_text_vector = intersection(vector["text"], token["text"])
            common_hashtag_vector = intersection(vector["hashtag"],
                                                 token["hashtag"])
            common_url_vector = intersection(vector["url"], token["url"])
            if common_text_vector == [] and \
                common_hashtag_vector == [] and \
                common_url_vector == []:
                continue

            vector = single_cluster.get_vector(True)

            new_token = {}
            new_token["text"] = " ".join(token["text"])
            new_token["hashtag"] = token["hashtag"]
            new_token["url"] = token["url"]

            similarity = word_processor.new_triple_similarity(
                new_token, vector)
            # print("Tweet %d, Cluster %d" % (i, j))
            # print("Similarity before: %f" % (similarity))
            # # similarity = word_processor.docs_similarity(tweet[0], vector)
            # similarity = word_processor.modified_similarity(
            #         similarity, tweet[1], single_cluster)
            # print("Similarity after: %f" % (similarity))
            # if similarity >= E and similarity > max_cluster_similarity:
            if similarity >= E:
                # max_cluster_similarity = similarity
                # max_cluster_index = j
                single_cluster.push(tweet[0], tweet[1], token)
                clustered = True
                # TODO: we need to consider when one tweet is similar to multiple clusters,
                # which cluster should we push to
                break

        # if max_cluster_index != -1:
        #     all_clusters[max_cluster_index].push(tweet[0], tweet[1], token)
        #     clustered = True

        if not clustered:
            new_cluster = cluster.Cluster(tweet[0], tweet[1], token,
                                          cluster_id)
            cluster_id += 1
            all_clusters.append(new_cluster)

        # print(i)

    print("Total number of clusters generated: %d" % (len(all_clusters)))
    cluster_sizes = [x.get_size() for x in all_clusters]
    print("The sizes of all clusters generated:")
    print(cluster_sizes)
    max_cluster_size = max(cluster_sizes)

    # for j in range(len(cluster_sizes)):
    #     if cluster_sizes[j] == max_cluster_size:
    #         break

    print("The max cluster size is: %d" % (max_cluster_size))
    # print("Number of tweets clustered using hashtag/url: %d" % (word_processor.hashtag_index))
    # print("Number of tweets clustered using text: %d" % (word_processor.text_index))

    # for item in all_clusters[j].get_all_tweets():
    #     print(item)

    # similarity = word_processor.docs_similarity(all_tweets[0][0], all_tweets[0][0])
    # the similarity we get is greater the better, closer to 1 means they are very
    # similar, otherwise very different

    # TODO: after finish clustering, we need to compute the similarity between
    # each cluster and each news we retrieved
    F = float(sys.argv[3])
    related_news_clusters = []

    # for article in articles:
    for i in range(len(articles)):
        news_cluster_group = {}
        # max_similarity = 0
        # max_similarity_index = -1

        article = articles[i]
        text = article["title"] + article["description"]
        time = article["publish_time"]

        # for single_cluster in all_clusters:
        for j in range(len(all_clusters)):
            single_cluster = all_clusters[j]

            # Remove outlier clusters
            # if single_cluster.get_size() <= 10 or single_cluster.is_clustered:
            if single_cluster.get_size() <= 10:
                continue

            cluster_vector = single_cluster.get_vector(True)["text"]
            similarity = word_processor.docs_similarity(text, cluster_vector)
            similarity = word_processor.modified_similarity(
                similarity, time, single_cluster, True)

            # if similarity >= F and similarity > max_similarity:
            if similarity >= F:
                # max_similarity = similarity
                # max_similarity_index = j
                # The news is related to this cluster
                news_cluster_group["article"] = i
                news_cluster_group["cluster"] = single_cluster.get_id()
                related_news_clusters.append(news_cluster_group)
                # stop comparing with other clusters
                break

        # if max_similarity_index == -1:
        #     continue
        # news_cluster_group["article"] = i
        # news_cluster_group["cluster"] = max_similarity_index
        # related_news_clusters.append(news_cluster_group)
        # all_clusters[max_similarity_index].change_clustered()

    counter = {}
    for item in related_news_clusters:
        if item["cluster"] not in counter:
            counter[item["cluster"]] = 1
        else:
            counter[item["cluster"]] += 1

    # most_related_cluster = max(counter.items(), key=operator.itemgetter(1))[0]
    print("Number of pairs generated in total: %d" %
          (len(related_news_clusters)))
    print("All generated pairs:")
    print(related_news_clusters)

コード例 #5

0

ファイルを表示

ファイル: median_unique.py プロジェクト: jkeung/Insight_Data_Engineering_CC

from sys import argv  # for passing arguments from command line
import time  # for tracking time
import tweets  # contains Tweets class
# Program that calculates the median number of unique words per tweet
# Author: Jason Keung
# Created: July 4, 2015

if __name__ == "__main__":
    script, infile, outfile = argv
    start_time = time.time()
    print("Starting median_unique.py...")
    mytweet = tweets.Tweets(infile, outfile)
    while mytweet.read_tweet():
        mytweet.get_num_unique_words()
        mytweet.write_median()  # write median after EACH line is read
    mytweet.close()
    print("Output is saved to %s ") % (outfile)
    print("median_unique.py run successfully!")
    print("--- %s seconds ---\n") % (time.time() - start_time)

コード例 #6

0

ファイルを表示

ファイル: test.py プロジェクト: yjlo123/SongRecommend

import tweets
import songBase
import webbrowser
import fb


t = tweets.Tweets()
base = songBase.Songbase()


def init():
	global my_name
	global my_color
	my_name = raw_input('Enter your twitter name: ')
	my_color = t.user_to_color(my_name)
	print "Song base size: "+str(base.get_size())
	print "=========================="
	print "Your color is: "+my_color

def next_song():
	global my_color
	global my_url
	my_url = base.get_song(my_color,"SG",22,[])
	print "Current song:  "+my_url
	print "1. Next song"
	print "2. Like this song"
	webbrowser.open(my_url)

def like_song():
	global my_color
	global my_url

コード例 #7

0

ファイルを表示

def main():
    if int(sys.argv[5]) == 1:
        enable_time_relevancy = True
    else:
        enable_time_relevancy = False

    if int(sys.argv[6]) == 1:
        enable_hashtag_similarity = True
    else:
        enable_hashtag_similarity = False

    E = float(sys.argv[2])
    word_processor = processor.Processor(enable_hashtag_similarity)
    tweets_api = tweets.Tweets(int(sys.argv[4]))

    all_tweets = tweets_api.process_tweets(sys.argv[1])
    all_tokens = []
    copied_tweets = list(all_tweets)

    for tweet in copied_tweets:
        tokens = word_processor.tweet_tokenize(tweet[0])
        if tokens["text"] == []:
            all_tweets.remove(tweet)
            continue 
        all_tokens.append(tokens)

    all_clusters = []
    # we are computing the similarity of one tweet with all clusters
    # exists, not the similarity with other tweets
    cluster_id = 0
    for i in range(len(all_tweets)):
        # first cluster
        if all_clusters == []:
            tweet = all_tweets[i]
            token = all_tokens[i]
            new_cluster = cluster.Cluster(tweet[0], tweet[1], token, cluster_id)
            cluster_id += 1
            all_clusters.append(new_cluster)
            continue
        
        clustered = False
        # max_cluster_similarity = 0
        # max_cluster_index = -1
        token = all_tokens[i]
        # print("Tweet after processed: %s" % (token["text"]))
        # for single_cluster in all_clusters:
        for j in range(len(all_clusters)):
            single_cluster = all_clusters[j]
            vector = single_cluster.get_vector(False)
            # no common words between the tweet and the cluster, skip
            if not intersection(vector["text"], token["text"]) and \
                not intersection(vector["hashtag"], token["hashtag"]):
                continue

            # start_pre_similarity = time.time()

            new_token = {}
            new_token["text"] = " ".join(token["text"])
            new_token["hashtag"] = token["hashtag"]
            # new_token["url"] = token["url"]
            # print("Pre similarity duration: %s" % (time.time() - start_pre_similarity))

            # if all_text_in_cluster(new_token["text"], vector["text"]):
            #     similarity = 1
            # else:
            #     vector = single_cluster.get_vector(True)
            #     similarity = word_processor.new_triple_similarity(new_token, vector)
            try:
                # print("Cluster: %s" % (vector["text"]))
                vector = single_cluster.get_vector(True)
                similarity = word_processor.new_triple_similarity(new_token, vector)
            except:
                continue
                # print(new_token)
                # print(vector)

            if enable_time_relevancy:
                similarity = word_processor.modified_similarity(
                    similarity, all_tweets[i][1], single_cluster)

            # print("Similarity: %f" % (similarity))
            if similarity >= E:
                tweet = all_tweets[i]
                single_cluster.push(tweet[0], tweet[1], token)
                clustered = True
                break

        # if max_cluster_index != -1:
        #     all_clusters[max_cluster_index].push(tweet[0], tweet[1], token)
        #     clustered = True

        if not clustered:
            tweet = all_tweets[i]
            token = all_tokens[i]
            new_cluster = cluster.Cluster(tweet[0], tweet[1], token, cluster_id)
            cluster_id += 1
            all_clusters.append(new_cluster)
        
        # print("-----------------------------------------------------------")

    print("Total number of clusters generated: %d" % (len(all_clusters)))
    cluster_sizes = [x.get_size() for x in all_clusters]
    print("The sizes of all clusters generated:")
    print(cluster_sizes)
    max_cluster_size = max(cluster_sizes)

    # for j in range(len(cluster_sizes)):
    #     if cluster_sizes[j] == max_cluster_size:
    #         break

    print("The max cluster size is: %d" % (max_cluster_size))
    # print("Number of tweets clustered using hashtag/url: %d" % (word_processor.hashtag_index))
    # print("Number of tweets clustered using text: %d" % (word_processor.text_index))

    # for item in all_clusters[j].get_all_tweets():
    #     print(item)

    # similarity = word_processor.docs_similarity(all_tweets[0][0], all_tweets[0][0])
    # the similarity we get is greater the better, closer to 1 means they are very
    # similar, otherwise very different

    # TODO: after finish clustering, we need to compute the similarity between
    # each cluster and each news we retrieved
    news_api = news.News()
    articles = news_api.process_news(news_api.retrieve_everything())
    F = float(sys.argv[3])
    related_news_clusters = []

    # for article in articles:
    for i in range(len(articles)):
        news_cluster_group = {}
        # max_similarity = 0
        # max_similarity_index = -1

        article = articles[i]
        text = article["title"] + article["description"]
        publish_time = article["publish_time"]

        # for single_cluster in all_clusters:
        for j in range(len(all_clusters)):
            single_cluster = all_clusters[j]

            # Remove outlier clusters
            if single_cluster.get_size() < 10:
                continue

            cluster_vector = single_cluster.get_vector(True)["text"]
            
            if not intersection(cluster_vector, text):
                continue

            similarity = word_processor.docs_similarity(text, cluster_vector)
            # similarity = word_processor.modified_similarity(similarity, publish_time, single_cluster, True)
            # if enable_time_relevancy:
            #     similarity = word_processor.modified_similarity(
            #         similarity, publish_time, single_cluster, True)

            # if similarity >= F and similarity > max_similarity:
            if similarity >= F:
                # max_similarity = similarity
                # max_similarity_index = j
                # The news is related to this cluster
                news_cluster_group["article"] = i
                news_cluster_group["cluster"] = single_cluster.get_id()
                news_cluster_group["similarity"] = similarity
                related_news_clusters.append(news_cluster_group)
                # stop comparing with other clusters
                break
    
    print("Number of pairs generated in total: %d" % (len(related_news_clusters)))
    print("All generated pairs:")
    print(related_news_clusters)

    for related_pair in related_news_clusters:
        print("News below")
        article_id = related_pair["article"]
        print(articles[article_id])
        cluster_id = related_pair["cluster"]
        print("Tweets below:")
        for k in range(len(all_clusters[cluster_id].get_all_tweets())):
            print("[%d]: %s: " % (k, all_clusters[cluster_id].get_all_tweets()[k]))
        print("----------------------------------------------------")

コード例 #8

0

ファイルを表示

def main():
    E = float(sys.argv[2])

    # glove_file = datapath('glove.twitter.27B/glove.twitter.27B.200d.txt')
    # tmp_file = get_tmpfile("tweets_word2vec.txt")
    # _ = glove2word2vec(glove_file, tmp_file)
    # model = KeyedVectors.load_word2vec_format(tmp_file)
    # model.save("tweets_word2vec.model")

    # print("model completed")

    model = KeyedVectors.load("glove.twitter.27B/tweets_word2vec.model")

    news_api = news.News()
    word_processor = processor.Processor()
    tweets_api = tweets.Tweets()
    articles = news_api.process_news(news_api.retrieve_everything())

    data = []
    for line in open(sys.argv[1]):
        data.append(json.loads(line))

    all_tweets = tweets_api.process_tweets(data)

    all_tokens = []
    copied_tweets = list(all_tweets)

    for tweet in copied_tweets:
        tokens = word_processor.tweet_tokenize(tweet[0])
        if tokens == []:
            all_tweets.remove(tweet)
            continue
        all_tokens.append(tokens)

    all_clusters = []
    cluster_id = 0
    for i in range(len(all_tweets)):
        tweet = all_tweets[i]
        token = all_tokens[i]
        # first cluster
        if all_clusters == []:
            new_cluster = cluster.Cluster(tweet[0], tweet[1], token,
                                          cluster_id)
            cluster_id += 1
            all_clusters.append(new_cluster)
            continue

        clustered = False

        for j in range(len(all_clusters)):
            single_cluster = all_clusters[j]
            vector = single_cluster.get_vector(False)
            # no common words between the tweet and the cluster, skip
            common_text_vector = intersection(vector["text"], token["text"])
            common_hashtag_vector = intersection(vector["hashtag"],
                                                 token["hashtag"])
            common_url_vector = intersection(vector["url"], token["url"])
            if common_text_vector == [] and \
                    common_hashtag_vector == [] and \
                    common_url_vector == []:
                continue

            # vector = single_cluster.get_vector(True)

            new_token = {}
            new_token["text"] = token["text"]
            new_token["hashtag"] = token["hashtag"]

            # TODO: we can check if a word is in the pre-trained model by doing the following
            # for word not in new_token["text"]:
            #     if word in model.wv.vocab: # if word in model.vocab:
            #         print(word)

            similarity = model.wv.n_similarity(new_token["text"],
                                               vector["text"])
            print(similarity)

            if similarity >= E:
                # max_cluster_similarity = similarity
                # max_cluster_index = j
                single_cluster.push(tweet[0], tweet[1], token)
                clustered = True
                break

        if not clustered:
            new_cluster = cluster.Cluster(tweet[0], tweet[1], token,
                                          cluster_id)
            cluster_id += 1
            all_clusters.append(new_cluster)

    print("Total number of clusters generated: %d" % (len(all_clusters)))
    cluster_sizes = [x.get_size() for x in all_clusters]
    print("The sizes of all clusters generated:")
    print(cluster_sizes)
    max_cluster_size = max(cluster_sizes)
    print("The max cluster size is: %d" % (max_cluster_size))

    F = float(sys.argv[3])
    related_news_clusters = []

    # for article in articles:
    for i in range(len(articles)):
        news_cluster_group = {}
        # max_similarity = 0
        # max_similarity_index = -1

        article = articles[i]
        text = article["title"] + article["description"]
        time = article["publish_time"]

        # for single_cluster in all_clusters:
        for j in range(len(all_clusters)):
            single_cluster = all_clusters[j]

            # Remove outlier clusters
            # if single_cluster.get_size() <= 10 or single_cluster.is_clustered:
            if single_cluster.get_size() <= 10:
                continue

            cluster_vector = single_cluster.get_vector(True)["text"]
            similarity = word_processor.docs_similarity(text, cluster_vector)
            similarity = word_processor.modified_similarity(
                similarity, time, single_cluster, True)

            # if similarity >= F and similarity > max_similarity:
            if similarity >= F:
                # max_similarity = similarity
                # max_similarity_index = j
                # The news is related to this cluster
                news_cluster_group["article"] = i
                news_cluster_group["cluster"] = single_cluster.get_id()
                related_news_clusters.append(news_cluster_group)
                # stop comparing with other clusters
                break

        # if max_similarity_index == -1:
        #     continue
        # news_cluster_group["article"] = i
        # news_cluster_group["cluster"] = max_similarity_index
        # related_news_clusters.append(news_cluster_group)
        # all_clusters[max_similarity_index].change_clustered()

    counter = {}
    for item in related_news_clusters:
        if item["cluster"] not in counter:
            counter[item["cluster"]] = 1
        else:
            counter[item["cluster"]] += 1

    # most_related_cluster = max(counter.items(), key=operator.itemgetter(1))[0]
    print("Number of pairs generated in total: %d" %
          (len(related_news_clusters)))
    print("All generated pairs:")
    print(related_news_clusters)

コード例 #9

0

ファイルを表示

def main():
    # Settings
    if int(sys.argv[5]) == 1:
        enable_time_relevancy = True
    else:
        enable_time_relevancy = False

    if int(sys.argv[6]) == 1:
        enable_hashtag_similarity = True
    else:
        enable_hashtag_similarity = False

    E = float(sys.argv[2])
    # model = Doc2Vec.load("./enwiki_dbow/doc2vec.bin")
    # print("Starts loading the model.")
    doc2vec_model = Doc2VecModel()
    model = doc2vec_model.get_model()
    # print("Model loaded.")
    word_processor = processor.Processor(enable_hashtag_similarity)
    tweets_api = tweets.Tweets(int(sys.argv[4]))

    # print("Starts loading the dataset")
    all_tweets = tweets_api.process_tweets(sys.argv[1])
    # print("Dataset loaded")

    all_tokens = []
    copied_tweets = list(all_tweets)

    for tweet in copied_tweets:
        tokens = word_processor.tweet_tokenize(tweet[0])

        if tokens == []:
            all_tweets.remove(tweet)
            continue
        all_tokens.append(tokens)

    # print("pre-processing completed")

    all_clusters = []
    cluster_id = 0
    for i in range(len(all_tweets)):
        # start_total = time.time()
        # first cluster
        if all_clusters == []:
            tweet = all_tweets[i]
            token = all_tokens[i]
            new_cluster = cluster.Cluster(
                tweet[0], tweet[1], token, cluster_id, True, model)
            cluster_id += 1
            all_clusters.append(new_cluster)
            continue

        clustered = False
        # print("Starts clustering %d" % (i))
        token = all_tokens[i]
        for j in range(len(all_clusters)):
            vector = all_clusters[j].get_vector(False)
            single_cluster = all_clusters[j]
            # no common words between the tweet and the cluster, skip
            if not intersection(vector["text"], token["text"]) and \
                not intersection(vector["hashtag"], token["hashtag"]):
                continue

            # vector = single_cluster.get_vector(True)
            new_token = {}
            new_token["text"] = token["text"]
            new_token["hashtag"] = token["hashtag"]

            # cluster_dbow_vector = model.infer_vector(vector["text"])
            # similarity = spatial.distance.cosine(tweet_dbow_vector, cluster_dbow_vector)
            # similarity = 1 - similarity
            # if all_text_in_cluster(new_token["text"], vector["text"]):
            #     similarity = 1
            # else:
            #     tweet_dbow_vector = model.infer_vector(new_token["text"])
            #     similarity = word_processor.doc2vec_double_similarity(new_token, vector, tweet_dbow_vector, all_clusters[j])
            tweet_dbow_vector = model.infer_vector(new_token["text"])
            similarity = word_processor.doc2vec_double_similarity(
                new_token, vector, tweet_dbow_vector, all_clusters[j])

            if enable_time_relevancy:
                similarity = word_processor.modified_similarity(
                    similarity, all_tweets[i][1], single_cluster)

            if similarity >= E:
                tweet = all_tweets[i]
                all_clusters[j].push(tweet[0], tweet[1], token)
                clustered = True
                break

        if not clustered:
            # start_new_cluster = time.time()
            tweet = all_tweets[i]
            token = all_tokens[i]
            new_cluster = cluster.Cluster(tweet[0], tweet[1], token, cluster_id, True, model)
            cluster_id += 1
            all_clusters.append(new_cluster)
            # print("New cluster duration: %s" %
            #       (time.time() - start_new_cluster))

        # print("Total time: %s" % (time.time() - start_total))
        # print("Clustering completed %d" % (i))

    print("Total number of clusters generated: %d" % (len(all_clusters)))
    cluster_sizes = [x.get_size() for x in all_clusters]
    print("The sizes of all clusters generated:")
    print(cluster_sizes)
    max_cluster_size = max(cluster_sizes)
    print("The max cluster size is: %d" % (max_cluster_size))

    news_api = news.News()
    articles = news_api.process_news(news_api.retrieve_everything())
    F = float(sys.argv[3])
    related_news_clusters = []

    # for article in articles:
    for i in range(len(articles)):
        news_cluster_group = {}
        # max_similarity = 0
        # max_similarity_index = -1

        article = articles[i]
        text = article["title"] + article["description"]
        publish_time = article["publish_time"]

        # for single_cluster in all_clusters:
        for j in range(len(all_clusters)):
            single_cluster = all_clusters[j]

            # Remove outlier clusters
            if single_cluster.get_size() < 10:
                continue
            # print("Article %d, Cluster %d." % (i, j))
            cluster_vector = single_cluster.get_vector(True)["text"]
            if not intersection(cluster_vector, text):
                continue
            similarity = word_processor.docs_similarity(text, cluster_vector)

            # if enable_time_relevancy:
            #     similarity = word_processor.modified_similarity(
            #         similarity, publish_time, single_cluster, True)

            if similarity >= F:
                news_cluster_group = {}
                # find all clusters related to the news
                # if i not in news_cluster_group.keys():
                #     news_cluster_group[i] = []
                # cluster_id = single_cluster.get_id()
                # news_cluster_group[i].append((cluster_id, similarity))
                news_cluster_group["article"] = i
                news_cluster_group["cluster"] = single_cluster.get_id()
                news_cluster_group["similarity"] = similarity
                related_news_clusters.append(news_cluster_group)
                # stop comparing with other clusters
                break

        # if news_cluster_group != {}:
        #     related_news_clusters.append(news_cluster_group)

    print("Number of pairs generated in total: %d" %
          (len(related_news_clusters)))
    print("All generated pairs:")
    print(related_news_clusters)

    # for related_pair in related_news_clusters:
    #     article_id = list(related_pair.keys())[0]
    #     print("News is below")
    #     print(articles[article_id])
    #     print("Tweets are below")
    #     cluster_list = list(related_pair.values())[0]
    #     for cluster_id, similarity in cluster_list:
    #         for k in range(len(all_clusters[cluster_id].get_all_tweets())):
    #             print("[%d]: %s: " %
    #                 (k, all_clusters[cluster_id].get_all_tweets()[k]))
    #         print("---------------------------------------------------")
    for related_pair in related_news_clusters:
        print("News below")
        article_id = related_pair["article"]
        print(articles[article_id])
        cluster_id = related_pair["cluster"]
        print("Tweets below:")
        for k in range(len(all_clusters[cluster_id].get_all_tweets())):
            print("[%d]: %s: " %
                  (k, all_clusters[cluster_id].get_all_tweets()[k]))
        print("----------------------------------------------------")