예제 #1
0
    def friends_loop(self):
        database = mongoController.mongoController()
        while (True):
            print("REST API PROCESSING FRIENDS")
            # get user id from file
            friends_id_line = self.file_controller.get_and_remove_first_line(
                "data/friends.txt")
            if (friends_id_line == False):
                time.sleep(60 * 2)
                continue

            split_friends_id_line = friends_id_line.split(":")
            date_difference = self.calculate_days_diff_from_today(
                split_friends_id_line[1])

            if (date_difference.days > 1):
                user = database.users.get_user_with_id(
                    split_friends_id_line[0])
                if not 'friends' in user:
                    # get users friends and insert into db
                    friends = self.rest_client.get_friends_ids(
                        friends_id_line[0])
                    database.friends.insert_friends(split_friends_id_line[0],
                                                    friends)
            else:
                self.file_controller.append_one_line("data/friends.txt",
                                                     friends_id_line)
예제 #2
0
    def retweets_group(self, group):
        database = mongoController.mongoController()
        users = database.users.get_all_users()

        retweet_map = {}
        for user in users:
            if 'tweets' in user:
                for tweet in user['tweets']:
                    if 'retweets' in tweet:
                        if 'group' in tweet and tweet['group'] == group:
                            for retweet in tweet['retweets']:
                                for ind_retweets in retweet['retweets']:
                                    if not ind_retweets['user'] in retweet_map:
                                        retweet_map[ind_retweets['user']] = {
                                            user['idd']: 1
                                        }
                                    else:
                                        temp = retweet_map[
                                            ind_retweets['user']]
                                        if user['idd'] in temp:
                                            temp.update({
                                                user['idd']:
                                                temp[user['idd']] + 1
                                            })
                                        else:
                                            temp[user['idd']] = 1
        # print(retweet_map)
        return retweet_map
예제 #3
0
    def quotes_group(self, group):
        database = mongoController.mongoController()
        users = database.users.get_all_users()

        quote_map = {}
        for user in users:
            if 'tweets' in user:
                for tweet in user['tweets']:
                    if 'quote_status' in tweet:
                        if 'group' in tweet and tweet['group'] == group:
                            if not user['idd'] in quote_map:
                                quote_map[user['idd']] = {
                                    tweet['quote_user']: 1
                                }
                            else:
                                temp = quote_map[user['idd']]
                                if tweet['quote_user'] in temp:
                                    temp.update({
                                        tweet['quote_user']:
                                        temp[tweet['quote_user']] + 1
                                    })
                                else:
                                    temp[tweet['quote_user']] = 1
        # print(quote_map)
        return quote_map
예제 #4
0
    def retweet_loop(self):
        database = mongoController.mongoController()
        while (True):
            print("REST API PROCESSING RETWEETS")
            # get the tweet id from file
            tweet_id_line = self.file_controller.get_and_remove_first_line(
                "data/retweets.txt")
            if (tweet_id_line == False):
                time.sleep(60 * 2)
                continue

            # calculate the difference between today and the date user was added to file
            split_tweet_line = tweet_id_line.split(':')
            date_difference = self.calculate_days_diff_from_today(
                split_tweet_line[1])

            # if ready to be processed
            # parse and add to tweet database object
            if (date_difference.days > 1):
                retweets = self.rest_client.get_retweets(split_tweet_line[0])
                parsed_retweets = self.retweet_parser.parse_retweet_chain(
                    split_tweet_line[0], retweets)
                database.retweets.insert_retweets(split_tweet_line[0],
                                                  parsed_retweets)
            # else re-add tweet id to bottom of file
            else:
                self.file_controller.append_one_line("data/retweets.txt",
                                                     tweet_id_line)
예제 #5
0
    def parse_tweets(self, final_tweets_list, json_tweet):
        print("PARSING TWEET")
        file_controller = fileController.fileController()
        database = mongoController.mongoController()

        # skip if repeated tweet
        if not database.tweets.get_tweet_exists(json_tweet['user']['id'],
                                                json_tweet['id']):
            # if user not in db - add them
            user = database.users.get_user_with_id(json_tweet['user']['id'])
            if not user:
                self.create_new_user(database, json_tweet['user'])

            # create if not a retweet
            if not 'retweeted_status' in json_tweet:
                mongo_tweet = self.mongo_tweet_structure(json_tweet)

                # parse the quoted tweet if exists
                if 'quoted_status' in json_tweet:
                    self.parse_tweets(final_tweets_list,
                                      json_tweet['quoted_status'])

                final_tweets_list.append(mongo_tweet)
            # if it is a retweet get the original tweet and parse
            else:
                self.parse_tweets(final_tweets_list,
                                  json_tweet['retweeted_status'])
                retweet_line = str(
                    json_tweet['retweeted_status']['id']) + ':' + str(
                        datetime.date.today())
                file_controller.append_one_line("data/retweets.txt",
                                                retweet_line)

        return final_tweets_list
예제 #6
0
    def kmeans(self):
        print("PROCESSING GROUPS")
        database = mongoController.mongoController()
        # get all tweets
        documents, ids = self.process()

        # turn tweets into TD-IDF representation
        vectorizer = TfidfVectorizer(
            stop_words=nltk.corpus.stopwords.words('english'))
        X = vectorizer.fit_transform(documents)

        model = KMeans(n_clusters=self.n_clusters,
                       init='k-means++',
                       max_iter=10000,
                       n_init=1)
        model.fit(X)

        print("Top terms per cluster:")
        order_centroids = model.cluster_centers_.argsort()[:, ::-1]
        terms = vectorizer.get_feature_names()

        # Assign groups to each tweet and insert into database
        for i in range(len(documents)):
            print("Processing Kmeans for tweet " + str(i) + "/" +
                  str(len(documents)))
            Y = vectorizer.transform([documents[i]])
            prediction = model.predict(Y)
            database.tweets.insert_group_to_tweet(ids[i], prediction[0])
예제 #7
0
 def total_tweets(self):
     database = mongoController.mongoController()
     all_tweets = database.tweets.get_all_tweets()
     total = 0
     # loop over tweets set for each user
     for tweets in all_tweets:
         # loop over each tweet for current user
         for tweet in tweets['tweets']:
             total = total + 1
     return total
예제 #8
0
    def total_retweets(self):
        database = mongoController.mongoController()
        all_tweets = database.tweets.get_all_tweets()
        total = 0

        for tweets in all_tweets:
            for tweet in tweets['tweets']:
                if 'retweets' in tweet:
                    total = total + len(tweet['retweets'])
        return total
예제 #9
0
    def total_quotes(self):
        database = mongoController.mongoController()
        all_tweets = database.tweets.get_all_tweets()
        total = 0

        for tweets in all_tweets:
            for tweet in tweets['tweets']:
                if 'quote_status' in tweet:
                    total = total + 1
        return total
예제 #10
0
    def total_retweets_group(self, group):
        database = mongoController.mongoController()
        all_tweets = database.tweets.get_all_tweets()
        total = 0

        for tweets in all_tweets:
            for tweet in tweets['tweets']:
                if 'group' in tweet and tweet[
                        'group'] == group and 'retweets' in tweet:
                    total = total + len(tweet['retweets'])
        return total
예제 #11
0
 def total_tweets_group(self, group):
     database = mongoController.mongoController()
     all_tweets = database.tweets.get_all_tweets()
     total = 0
     # loop over tweets set for each user
     for tweets in all_tweets:
         # loop over each tweet for current user
         for tweet in tweets['tweets']:
             if 'group' in tweet and tweet['group'] == group:
                 total = total + 1
     return total
예제 #12
0
    def total_quote_group(self, group):
        database = mongoController.mongoController()
        all_tweets = database.tweets.get_all_tweets()
        total = 0

        for tweets in all_tweets:
            for tweet in tweets['tweets']:
                if 'group' in tweet and tweet[
                        'group'] == group and 'quote_status' in tweet:
                    total = total + 1
        return total
예제 #13
0
 def process(self):
     database = mongoController.mongoController()
     tweets = database.tweets.get_all_tweets()
     documents = []
     ids = []
     for tweet in tweets:
         for ind_tweet in tweet['tweets']:
             new_string = ' '.join(
                 [w for w in ind_tweet['text'].split() if len(w) > 3])
             new_string = re.sub(r'http\S+', '', new_string)
             documents.append(new_string)
             ids.append(ind_tweet['idd'])
     return documents, ids
예제 #14
0
    def parse_followers(self, followers):
        database = mongoController.mongoController()
        all_followers = []
        for follower in followers:
            # Parse into json
            temp = json.dumps(follower._json)
            json_follower = json.loads(temp)

            # does user exist? Create user if not
            user_exists = database.users.get_user_exists(json_follower['id'])
            if not user_exists:
                new_user = self.user_parser.parse_user(json_follower)
                database.users.insert_a_user(new_user)

            # user = database.users.get_user_with_id(json_follower['id'])
            all_followers.append(json_follower['id'])

        return all_followers
예제 #15
0
    def timeline_loop(self):
        database = mongoController.mongoController()
        while (True):
            print("REST API PROCESSING USERS TIMELINE")
            # get the user id from file
            timeline_id_line = self.file_controller.get_and_remove_first_line(
                "data/users.txt")
            if (timeline_id_line == False):
                time.sleep(60 * 2)
                continue

            tweets = self.rest_client.get_tweets_from_user(timeline_id_line)

            # parse and insert tweets
            parsed_tweets = self.tweet_parser.parse_rest_tweet_chain(tweets)
            if not parsed_tweets is None:
                for parsed_tweet in parsed_tweets:
                    if len(parsed_tweet) > 0:
                        database.tweets.insert_tweets(parsed_tweet)
예제 #16
0
    def followers_loop(self):
        database = mongoController.mongoController()
        switch = 0
        while (True):
            print("REST API PROCESSING FOLLOWERS")
            # get the user id from file
            follower_id_line = self.file_controller.get_and_remove_first_line(
                "data/followers.txt")
            if (follower_id_line == False):
                time.sleep(60 * 2)
                continue

            split_follower_id_line = follower_id_line.split(':')
            date_difference = self.calculate_days_diff_from_today(
                split_follower_id_line[1])

            if (date_difference.days > 1):
                # switch between both follower API's
                user = database.users.get_user_with_id(
                    split_follower_id_line[0])
                if not 'followers' in user:
                    if (switch % 2 == 0):
                        followers = self.rest_client.get_followers(
                            split_follower_id_line[0])
                        all_followers = self.follower_parser.parse_followers(
                            followers)
                    else:
                        all_followers = self.rest_client.get_followers_ids(
                            split_follower_id_line[0])

                    # insert followers to db
                    database.followers.insert_followers(
                        split_follower_id_line[0], all_followers)

                    # calculate switch
                    if (switch) == 1:
                        switch = 0
                    else:
                        switch = switch + 1
            else:
                self.file_controller.append_one_line("data/followers.txt",
                                                     follower_id_line)
예제 #17
0
    def hashtags_groups(self, group):
        database = mongoController.mongoController()
        users = database.users.get_all_users()

        hashtag_map = {}
        for user in users:
            if 'tweets' in user:
                for tweet in user['tweets']:
                    if 'group' in tweet and tweet['group'] == group:
                        for hashtag in tweet['hashtags']:
                            if not hashtag in hashtag_map:
                                hashtag_map[hashtag] = {tweet['user']: 1}
                            else:
                                temp = hashtag_map[hashtag]
                                if tweet['user'] in temp:
                                    temp.update({
                                        tweet['user']:
                                        temp[tweet['user']] + 1
                                    })
                                else:
                                    temp[tweet['user']] = 1
        # print(hashtag_map)
        return hashtag_map
예제 #18
0
    def hashtags(self):
        database = mongoController.mongoController()
        users = database.users.get_all_users()
        usable_users = []

        hashtag_map = {}
        for user in users:
            if 'tweets' in user:
                usable_users.append(user)

        for i, user in enumerate(usable_users):
            for tweet in user['tweets']:
                for hashtag in tweet['hashtags']:
                    if not hashtag in hashtag_map:
                        hashtag_map[hashtag] = {tweet['user']: 1}
                    else:
                        temp = hashtag_map[hashtag]
                        if tweet['user'] in temp:
                            temp.update(
                                {tweet['user']: temp[tweet['user']] + 1})
                        else:
                            temp[tweet['user']] = 1
        return hashtag_map
예제 #19
0
    def replies(self):
        database = mongoController.mongoController()
        users = database.users.get_all_users()

        reply_map = {}
        for user in users:
            if 'tweets' in user:
                for tweet in user['tweets']:
                    if 'response_status' in tweet:
                        if not user['idd'] in reply_map:
                            reply_map[user['idd']] = {
                                tweet['response_user']: 1
                            }
                        else:
                            temp = reply_map[user['idd']]
                            if tweet['response_user'] in temp:
                                temp.update({
                                    tweet['response_user']:
                                    temp[tweet['response_user']] + 1
                                })
                            else:
                                temp[tweet['response_user']] = 1
        # print(reply_map)
        return reply_map
예제 #20
0
        )
    if len(sys.argv) == 2:
        if (sys.argv[1]) == 'run':
            print("RUNNING STREAMER AND REST API")
            run()
        if (sys.argv[1]) == 'run-stream':
            print("RUNNING STREAMER")
            stream()
        if (sys.argv[1]) == 'run-api':
            print("RUNNING REST API")
            api()
        if (sys.argv[1]) == 'process':
            print("PROCESSING TWEETS")
            process_results()
            print("TWEETS PROCESSED")
        if (sys.argv[1]) == 'status':
            print("PRINTING STATUS")
            status()
        if (sys.argv[1] == 'purge'):
            print("PURGING DATABASE")
            database = mongoController.mongoController()
            database.users.remove_all_users()
            print(database.users.get_all_users())
            print("PURGED DATABASE")
        if (sys.argv[1] == "manual"):
            manual()

    ## Write to file for debug
    # file_controller = fileController.fileController()
    # file_controller.write_data_to_file("test.json", database.users.get_all_users())
예제 #21
0
 def __init__(self):
     self.tweet_parser = tweetParser.tweetParser()
     self.user_parser = userParser.userParser()
     self.database = mongoController.mongoController()