Example #1
0
def fetch_new_tweets():
    num_of_topics = 1
    auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET, CONSUMER_KEY,
                               CONSUMER_SECRET)

    twitter_api = twitter.Twitter(auth=auth)

    US_WOE_ID = 23424977
    WORLD_WOE_ID = 1

    tweet_list = []
    tweet_text_list = []

    us_trends = twitter_api.trends.place(_id=WORLD_WOE_ID)

    # print the top 10 tweets
    topics = []
    for i in range(num_of_topics):
        name = us_trends[0]["trends"][i]["name"]
        print name
        topics.append(name)
    total_count = 500
    count_per_search = 100
    topic_counter = 0
    for topic in topics:
        topic_counter += 1
        print "topic #", topic_counter
        count_fetched = 0
        max_id = -1
        while count_fetched < total_count:
            row_num = 0
            search_results = twitter_api.search.tweets(q=topic,
                                                       count=count_per_search,
                                                       max_id=max_id)
            statuses = search_results["statuses"]
            search_results_len = len(statuses)
            # per tweet processing
            while row_num < count_per_search:
                t = Tweet()
                status = statuses[row_num]
                #print status
                resp = json.dumps(status, indent=4)
                #print resp
                text = status["text"]
                t.text = text
                # Message based features
                t.length_tweet = len(text)
                t.num_words = len(text.split())
                t.num_unique_chars = CommonUtil.count_unique_chars(text)
                t.num_hashtags = text.count("#")
                t.retweet_cnt = status["retweet_count"]
                max_id = status["id"]
                t.num_swear_words = CommonUtil.count_swear_words(text)
                t.num_at_emotions = text.count("@")

                # Source based Features
                user_features = status["user"]
                t.registration_age = CommonUtil.count_num_days_from_today(
                    user_features["created_at"])
                t.num_followers = user_features["followers_count"]
                t.num_followee = user_features["friends_count"]
                if t.num_followee != 0:
                    t.ratio_foll_followee = t.num_followers / t.num_followee
                is_verified = user_features["verified"]
                if is_verified:
                    t.is_verified = 1
                else:
                    t.is_verified = 0
                t.len_desc = len(user_features["description"])
                t.len_screen_name = len(user_features["screen_name"])
                user_url = user_features["url"]
                if user_url:
                    t.has_url = 1
                # Create tweet characteristics to write to file
                tweet_str =  str(t.length_tweet) + "|" + str(t.num_words) + "|" + str(t.num_unique_chars) + "|" \
                            + str(t.num_hashtags) + "|" + str(t.retweet_cnt) + "|" +  str(t.num_swear_words) + "|" \
                            + str(t.num_at_emotions) + "|" \
                            + str(t.registration_age) + "|" + str(t.num_followers) + "|" + str(t.num_followee) + "|" \
                            + str(t.is_verified) + "|" + str(t.len_desc) + "|" + str(t.len_screen_name) + "|" \
                            + str(t.has_url)
                tweet_list.append(tweet_str)
                tweet_text_list.append(smart_str(text))

                row_num += 1
            count_fetched += search_results_len
    return tweet_list, tweet_text_list
Example #2
0
def fetch_tweets_for_topic(topic):
    total_count = 100
    count_per_search = 100
    row_num = 0
    max_id = -1
    tweet_list = []
    tweet_text_list = []
    count_fetched = 0
    auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET, CONSUMER_KEY,
                               CONSUMER_SECRET)

    twitter_api = twitter.Twitter(auth=auth)
    while count_fetched < total_count:
        row_num = 0
        search_results = twitter_api.search.tweets(q=topic,
                                                   count=count_per_search,
                                                   max_id=max_id)
        statuses = search_results["statuses"]
        search_results_len = len(statuses)
        # per tweet processing
        while row_num < count_per_search:
            t = Tweet()
            status = statuses[row_num]
            #print status
            resp = json.dumps(status, indent=4)
            #print resp
            text = status["text"]
            text = smart_str(text)
            text = text.replace("\n", " ")
            text = text.replace("|", "")
            text = text.replace("$", "")
            t.text = text
            # Message based features
            t.length_tweet = len(text)
            t.num_words = len(text.split())
            t.num_unique_chars = CommonUtil.count_unique_chars(text)
            t.num_hashtags = text.count("#")
            t.retweet_cnt = status["retweet_count"]
            max_id = status["id"]
            t.num_swear_words = CommonUtil.count_swear_words(text)
            t.num_at_emotions = text.count("@")

            # Source based Features
            user_features = status["user"]
            t.registration_age = CommonUtil.count_num_days_from_today(
                user_features["created_at"])
            t.num_followers = user_features["followers_count"]
            t.num_followee = user_features["friends_count"]
            if t.num_followee != 0:
                t.ratio_foll_followee = t.num_followers / t.num_followee
            is_verified = user_features["verified"]
            if is_verified:
                t.is_verified = 1
            else:
                t.is_verified = 0
            t.len_desc = len(user_features["description"])
            t.len_screen_name = len(user_features["screen_name"])
            user_url = user_features["url"]
            if user_url:
                t.has_url = 1
            # Create tweet characteristics to write to file
            tweet_str =  text + "|" + str(t.length_tweet) + "|" + str(t.num_words) + "|" + str(t.num_unique_chars) + "|" \
                        + str(t.num_hashtags) + "|" + str(t.retweet_cnt) + "|" +  str(t.num_swear_words) + "|" \
                        + str(t.num_at_emotions) + "|" \
                        + str(t.registration_age) + "|" + str(t.num_followers) + "|" + str(t.num_followee) + "|" \
                        + str(t.is_verified) + "|" + str(t.len_desc) + "|" + str(t.len_screen_name) + "|" \
                        + str(t.has_url)
            tweet_list.append(tweet_str)
            tweet_text_list.append(smart_str(text))
            row_num += 1
        count_fetched += search_results_len
    # write the tweets to a file
    outfile = open("test_tweets.txt", "w")
    for tweet in tweet_list:
        outfile.write(tweet)
        outfile.write("\n")
    outfile.close()
    # convert the tweet string to comma separated string
    tweet_text_str = ""
    for tweet in tweet_text_list:
        tweet_text_str = tweet_text_str + "$" + tweet
    return tweet_text_str
Example #3
0
        search_results_len = len(statuses)
        # per tweet processing
        while row_num < count_per_search:
            status = statuses[row_num]
            print status
            resp = json.dumps(status, indent=4)
            print resp
            text = status["text"]
            # Message based features
            length_tweet = len(text)
            num_words = len(text.split())
            num_unique_chars = CommonUtil.count_unique_chars(text)
            num_hashtags = text.count("#")
            retweet_cnt = status["retweet_count"]
            max_id = status["id"]
            num_swear_words = CommonUtil.count_swear_words(text)
            num_at_emotions = text.count("@")

            # Source based Features
            user_features = status["user"]
            num_followers = user_features["followers_count"]
            num_friends = user_features["friends_count"]
            is_verified = user_features["verified"]
            if is_verified:
                is_verified = 1
            else:
                is_verified = 0
            len_desc = len(user_features["description"])
            len_screen_name = len(user_features["screen_name"])
            user_url = user_features["url"]
            print "user_url", user_url
Example #4
0
def fetch_new_tweets():
    num_of_topics = 1
    auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET,
                               CONSUMER_KEY, CONSUMER_SECRET)

    twitter_api = twitter.Twitter(auth=auth)


    US_WOE_ID = 23424977
    WORLD_WOE_ID = 1

    tweet_list = []
    tweet_text_list = []

    us_trends = twitter_api.trends.place(_id=WORLD_WOE_ID)

    # print the top 10 tweets
    topics = []
    for i in range(num_of_topics):
        name = us_trends[0]["trends"][i]["name"]
        print name
        topics.append(name)
    total_count = 500
    count_per_search = 100
    topic_counter = 0
    for topic in topics:
        topic_counter += 1
        print "topic #", topic_counter
        count_fetched = 0
        max_id = -1
        while count_fetched < total_count:
            row_num = 0
            search_results = twitter_api.search.tweets(q=topic, count=count_per_search, max_id=max_id)
            statuses = search_results["statuses"]
            search_results_len = len(statuses)
            # per tweet processing
            while row_num < count_per_search:
                t = Tweet()
                status = statuses[row_num]
                #print status
                resp = json.dumps(status, indent=4)
                #print resp
                text = status["text"]
                t.text = text
                # Message based features
                t.length_tweet = len(text)
                t.num_words = len(text.split())
                t.num_unique_chars = CommonUtil.count_unique_chars(text)
                t.num_hashtags = text.count("#")
                t.retweet_cnt = status["retweet_count"]
                max_id = status["id"]
                t.num_swear_words = CommonUtil.count_swear_words(text)
                t.num_at_emotions = text.count("@")

                # Source based Features
                user_features = status["user"]
                t.registration_age = CommonUtil.count_num_days_from_today(user_features["created_at"])
                t.num_followers = user_features["followers_count"]
                t.num_followee = user_features["friends_count"]
                if t.num_followee !=0:
                       t.ratio_foll_followee = t.num_followers / t.num_followee
                is_verified = user_features["verified"]
                if is_verified:
                    t.is_verified = 1
                else:
                    t.is_verified = 0
                t.len_desc = len(user_features["description"])
                t.len_screen_name = len(user_features["screen_name"])
                user_url = user_features["url"]
                if user_url:
                    t.has_url = 1
                # Create tweet characteristics to write to file
                tweet_str =  str(t.length_tweet) + "|" + str(t.num_words) + "|" + str(t.num_unique_chars) + "|" \
                            + str(t.num_hashtags) + "|" + str(t.retweet_cnt) + "|" +  str(t.num_swear_words) + "|" \
                            + str(t.num_at_emotions) + "|" \
                            + str(t.registration_age) + "|" + str(t.num_followers) + "|" + str(t.num_followee) + "|" \
                            + str(t.is_verified) + "|" + str(t.len_desc) + "|" + str(t.len_screen_name) + "|" \
                            + str(t.has_url)
                tweet_list.append(tweet_str)
                tweet_text_list.append(smart_str(text))

                row_num += 1
            count_fetched += search_results_len
    return tweet_list , tweet_text_list
Example #5
0
def fetch_tweets_for_topic(topic):
    total_count = 100
    count_per_search = 100
    row_num = 0
    max_id = -1
    tweet_list = []
    tweet_text_list = []
    count_fetched = 0
    auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET,
                               CONSUMER_KEY, CONSUMER_SECRET)

    twitter_api = twitter.Twitter(auth=auth)
    while count_fetched < total_count:
            row_num = 0
            search_results = twitter_api.search.tweets(q=topic, count=count_per_search, max_id=max_id)
            statuses = search_results["statuses"]
            search_results_len = len(statuses)
            # per tweet processing
            while row_num < count_per_search:
                t = Tweet()
                status = statuses[row_num]
                #print status
                resp = json.dumps(status, indent=4)
                #print resp
                text = status["text"]
                text = smart_str(text)
                text = text.replace("\n", " ")
                text = text.replace("|", "")
                text = text.replace("$", "")
                t.text = text
                # Message based features
                t.length_tweet = len(text)
                t.num_words = len(text.split())
                t.num_unique_chars = CommonUtil.count_unique_chars(text)
                t.num_hashtags = text.count("#")
                t.retweet_cnt = status["retweet_count"]
                max_id = status["id"]
                t.num_swear_words = CommonUtil.count_swear_words(text)
                t.num_at_emotions = text.count("@")

                # Source based Features
                user_features = status["user"]
                t.registration_age = CommonUtil.count_num_days_from_today(user_features["created_at"])
                t.num_followers = user_features["followers_count"]
                t.num_followee = user_features["friends_count"]
                if t.num_followee !=0:
                       t.ratio_foll_followee = t.num_followers / t.num_followee
                is_verified = user_features["verified"]
                if is_verified:
                    t.is_verified = 1
                else:
                    t.is_verified = 0
                t.len_desc = len(user_features["description"])
                t.len_screen_name = len(user_features["screen_name"])
                user_url = user_features["url"]
                if user_url:
                    t.has_url = 1
                # Create tweet characteristics to write to file
                tweet_str =  text + "|" + str(t.length_tweet) + "|" + str(t.num_words) + "|" + str(t.num_unique_chars) + "|" \
                            + str(t.num_hashtags) + "|" + str(t.retweet_cnt) + "|" +  str(t.num_swear_words) + "|" \
                            + str(t.num_at_emotions) + "|" \
                            + str(t.registration_age) + "|" + str(t.num_followers) + "|" + str(t.num_followee) + "|" \
                            + str(t.is_verified) + "|" + str(t.len_desc) + "|" + str(t.len_screen_name) + "|" \
                            + str(t.has_url)
                tweet_list.append(tweet_str)
                tweet_text_list.append(smart_str(text))
                row_num += 1
            count_fetched += search_results_len
    # write the tweets to a file
    outfile = open("test_tweets.txt", "w")
    for tweet in tweet_list:
        outfile.write(tweet)
        outfile.write("\n")
    outfile.close()
    # convert the tweet string to comma separated string
    tweet_text_str = ""
    for tweet in tweet_text_list:
        tweet_text_str = tweet_text_str + "$" + tweet
    return tweet_text_str
Example #6
0
        search_results_len = len(statuses)
        # per tweet processing
        while row_num < count_per_search:
            status = statuses[row_num]
            print status
            resp = json.dumps(status, indent=4)
            print resp
            text = status["text"]
            # Message based features
            length_tweet = len(text)
            num_words = len(text.split())
            num_unique_chars = CommonUtil.count_unique_chars(text)
            num_hashtags = text.count("#")
            retweet_cnt = status["retweet_count"]
            max_id = status["id"]
            num_swear_words = CommonUtil.count_swear_words(text)
            num_at_emotions = text.count("@")

            # Source based Features
            user_features = status["user"]
            num_followers = user_features["followers_count"]
            num_friends = user_features["friends_count"]
            is_verified = user_features["verified"]
            if is_verified:
                is_verified = 1
            else:
                is_verified = 0
            len_desc = len(user_features["description"])
            len_screen_name = len(user_features["screen_name"])
            user_url = user_features["url"]
            print "user_url", user_url