Exemple #1
0
def get_follower_ids(tweepy_obj):
    ids_list = []
    try:
        c = Cursor(api.followers_ids, tweepy_obj.id)
    except TweepError:
        print 'tweepy breaks!'
    while(True):
        try:
            print 'new page...'
            page = c.pages().next()
            sleep(2)
        except TweepError:
            print "tweep breaks!"
        except StopIteration:
            print 'done with', tweepy_obj.id
            break
        ids_list.extend(page)
        
    try:
        print "the remaining hit is ", \
            api.rate_limit_status()['remaining_hits']
    except TweepError:
        print "tweep breaks!"
        print TweepError.message
    return ids_list
def findTweets(keywords):
    authentication = OAuthHandler(consumer_key, consumer_secret)
    authentication.set_access_token(access_token, access_token_secret)
    api = API(authentication)
    cursor = Cursor(api.search, q=keywords).items()
    while True:
        try:
            tweet = cursor.next()
            print ','.join([str(tweet.created_at), tweet.lang, tweet.text]).encode('utf-8')
Exemple #3
0
    def testcursornext(self):
        """
        Test cursor.next() behavior, id being passed correctly.
        Regression test for issue #518
        """
        cursor = Cursor(self.api.user_timeline, id='twitter').items(5)
        status = cursor.next()

        self.assertEquals(status.user.screen_name, 'twitter')
Exemple #4
0
def query_tweets(api, query, limit=None, languages=None):
    """
    Queries twitter REST API for tweets matching given twitter search 'query'.
    Takes an authenticated api object (API or APIPool), a query string, an optional
    limit for number of tweets returned, and an optional list of languages to
    further filter results.
    Returns a cursor (iterator) over Tweepy status objects (not native JSON docs)
    """
    cursor = Cursor(api.search, q=query, include_entities=True, lang=languages)
    if limit:
        return cursor.items(_check_limit(limit))
    return cursor.items()
def get_tweets_by_cursor(query):
    api = API(auth)
    query = query + " -RT"
    cursor = Cursor(api.search, q=query, lang="en").items(5000)
    while True:
        try:
            tweet = cursor.next()
            print(tweet._json)
            database.tweets.insert(tweet._json)
        except TweepError:
            time.sleep(60 * 15)
            continue
        except StopIteration:
            break
    def get_friends(self, user_id):
        logger.log(LOG_LEVEL, 'Getting friends for {}'.format(user_id))
        friends = []

        cursor = Cursor(
            self.api.friends,
            user_id=user_id
        )

        for user in cursor.items():
            if self.is_potential_target(user):
                friends.append(str(user.id))

        return friends
    def get_tweets(self, user_id):
        logger.log(LOG_LEVEL, 'Getting tweets for {}'.format(user_id))
        statuses = []

        cursor = Cursor(
            self.api.user_timeline,
            user_id=user_id,
            count=PAGE_COUNT
        )

        for status in cursor.items(TWEET_COUNT):
            statuses.append(self.parse_api_tweet(status))

        return statuses
Exemple #8
0
def save_user_followers(user):
    try:
        c = Cursor(api.followers,user.user_id)
    except TweepError:
        print "tweep breaks!"
        print TweepError.message
    while(True):
        try:
            print 'taking a rest before move to next page'
            sleep(10)
            page = c.pages().next()
            print "start a new page of user ", user.scrn_name, \
                'page', c.pages().count
        except TweepError:
            print "tweep breaks!"
            print TweepError.message
            continue
        except StopIteration:
            print "Move to next unscanned"
            break
        
        for tweepy_user in page:
            print "follower -----", tweepy_user.screen_name, "----- found......"
            if TwitterUser.get_by_id(tweepy_user.id) or \
                is_in_no_chn(tweepy_user.id):
                print 'ALREADY in DB!!, skip'
                continue
            try:
                if not tweepy_user.protected or \
                        (tweepy_user.protected and tweepy_user.following):
                        if is_chn(tweepy_user):
                            print "and speaks Chinese! Saving...."
                            TwitterUser.save_tweepy_user(tweepy_user)
                        else:
                            save_non_chn(tweepy_user.id)
                            print "pitty, s/he is not Chinese Speaker, next..."
                            continue
            except TweepError:
                print "tweep breaks!"
                print TweepError.message
            try:
                print "the remaining hit is ", \
                    api.rate_limit_status()['remaining_hits']
            except TweepError:
                print "tweep breaks!"
                print TweepError.message
        page =[]
    user.update_scanned()
def get_tweets_for_feature_extraction(query, count):
    api = API(auth)
    query = query + " -RT"
    cursor = Cursor(api.search, q=query, lang="en").items(count)
    tweets = []
    while True:
        try:
            tweet = cursor.next()
            tweets.append(tweet._json)
        except TweepError as e:
            print(e)
            time.sleep(60 * 5)
            continue
        except StopIteration:
            break
    return tweets
Exemple #10
0
def user_tweets(api, user_id=None, screen_name=None, limit=None):
    """
    Queries Twitter REST API for user's tweets. Returns as many as possible, or
    up to given limit.
    Takes an authenticated API object (API or APIPool), one of user_id or screen_name 
    (not both), and an optional limit for number of tweets returned.
    Returns a cursor (iterator) over Tweepy status objects
    """
    if not (user_id or screen_name):
        raise Exception("Must provide one of user_id or screen_name")
    if user_id:
        cursor = Cursor(api.user_timeline, user_id=user_id)
    elif screen_name:
        cursor = Cursor(api.user_timeline, screen_name=screen_name)
    if limit:
        return cursor.items(_check_limit(limit))
    return cursor.items()
def get_followers_ids(api, user_id):
    """
    Given a Tweepy/smappPy TweepyPool api, query twitter's rest API for followers of
    given user_id. Returns IDs only (much faster / more per request).
    Parameters:
        api     - fully authenticated Tweepy api or smappPy TweepyPool api
        user_id - twitter user id
    Returns tuple: return code, list of IDs or None (if API call fails)
    """
    cursor = Cursor(api.followers_ids, user_id=user_id)
    user_list, ret_code = call_with_error_handling(list, cursor.items())

    if ret_code != 0:
        logger.warning("User {0}: Followers request failed".format(user_id))

    # Return user list from API or None (call_with_error_handling returns None if
    # call fail)
    return ret_code, user_list
Exemple #12
0
def limit_handled(cursor: tweepy.Cursor):
    """Wrap cursor access with rate limiting

    :param cursor: The cursor to siphon
    :returns: Cursor items

    """
    while True:
        try:
            yield cursor.next()
        except tweepy.RateLimitError:
            time.sleep(15 * 60)
Exemple #13
0
def insert_user_with_friends(graph_db, twitter_user,user_labels=[]):
    user_labels.append("SeedNode")
    if isinstance(twitter_user, basestring):
        try:
            twitter_user = api.get_user(twitter_user)
        except:
            time.sleep(60 * 16)
            friend = friends.next()
    create_or_get_node(graph_db, twitter_user, user_labels)
    friend_count = 0
    print u"\nINSERTING FOR: {}".format(twitter_user.name)
    friends = Cursor(api.friends, user_id=twitter_user.id_str, count=200).items()
    try:
        while True:
            try:
                friend = friends.next()
            except tweepy.TweepError:
                print "exceeded rate limit. waiting"
                time.sleep(60 * 16)
                friend = friends.next()
            
            #print u"    INSERTING: {}".format(friend.name)
            friend_count += 1
            sys.stdout.write('.')
            if(friend_count%10 == 0): sys.stdout.write(' ')
            if(friend_count%50 == 0): sys.stdout.write('| ')
            if(friend_count%100 == 0): print

            
            create_or_get_node(graph_db, friend)
            query_string = """
                MATCH (user:User {id_str:{user_id_str}}),(friend:User {id_str:{friend_id_str}})
                CREATE UNIQUE (user)-[:FOLLOWS]->(friend)
                """
            data = {"user_id_str": twitter_user.id_str,
                    "friend_id_str": friend.id_str}
            n = graph_db.cypher.execute(query_string, data)

    except StopIteration:
        print u"\n    Total Friend Count = {}".format(friend_count)
def ensure_users_edges_in_db(user, edges_collection, twitter_api):
    "Looks up a user's friends_ids and followers_ids on the twitter api, and stores the edges in db."

    logging.info(".. Fetching followers_ids for user {0}.".format(user['id']))
    logging.info(".... user has {0} followers.".format(user['followers_count']))
    cursor = Cursor(twitter_api.followers_ids, id=user['id'])
    edges = [{ 'from' : follower_id,
               'to'   : user['id']}
            for follower_id in cursor.items()]
    store_edges(edges_collection, edges)
    followers_ids = [edge['from'] for edge in edges]

    logging.info(".. Fetching friends_ids for user {0}.".format(user['id']))
    logging.info(".... user has {0} friends.".format(user['friends_count']))
    cursor = Cursor(twitter_api.friends_ids, id=user['id'])
    edges = [{ 'to'   : friend_id,
               'from' : user['id']}
            for friend_id in cursor.items()]
    store_edges(edges_collection, edges)
    friends_ids = [edge['to'] for edge in edges]

    return friends_ids, followers_ids
Exemple #15
0
def user_tweets(api, user_id=None, screen_name=None, limit=None, **kwargs):
    """
    Queries Twitter REST API for user's tweets. Returns as many as possible, or
    up to given limit.
    Takes an authenticated API object (API or APIPool), one of user_id or screen_name 
    (not both), and an optional limit for number of tweets returned.
    Returns a cursor (iterator) over Tweepy status objects.

    Also takes variable collection of keyword argument to pass on to
    Tweepy/APIPool query methods, to support full API call parameterization.
    """
    if not (user_id or screen_name):
        raise Exception("Must provide one of user_id or screen_name")
    if user_id:
        cursor = Cursor(api.user_timeline, user_id=user_id, count=200,
            **kwargs)
    elif screen_name:
        cursor = Cursor(api.user_timeline, screen_name=screen_name,
            count=200, **kwargs)
    if limit:
        return cursor.items(_check_limit(limit))
    return cursor.items()
Exemple #16
0
    'BBCWorld', 'NHKWORLD_News', 'trtworld', 'cnni', 'foxnews', 'dwnews',
    'ajenglish', 'FRANCE24', 'rt_com', 'cgtnofficial'
]
names = [
    'bbc', 'nhk', 'trt', 'cnn', 'foxnews', 'dw', 'aj', 'fr24', 'rt', 'cgtn'
]
accounts = accounts + names
tw = pd.DataFrame()
lists = [bbc, nhk, trt, cnn, foxnews, dw, aj, fr24, rt, cgtn]
accounts = accounts + lists

# Here, I requested for tweets by qursor object and recieved necessary information, number of favorites, retweets and texts of specific tweets
# This code may not work well since so many queries
tw['fav_' + i] = [
    k.favorite_count for i in accounts
    for j in Cursor(api.user_timeline, screen_name=i).pages() for k in j
]
tw['rt_' + i] = [
    k.retweet_count for i in accounts
    for j in Cursor(api.user_timeline, screen_name=i).pages() for k in j
]
tw[i] = [
    k.text for i in accounts
    for j in Cursor(api.user_timeline, screen_name=i).pages() for k in j
]

# Then i reduced the number of rows of dataframe to 3000
tw = tw.iloc[:3000, :]

# To manipulate text data, i set their column index numbers to a variable
text_places = [2, 5, 8, 11, 14, 17, 20, 23, 26, 29]
from tweepy import Stream
from tweepy import API
from tweepy import Cursor
from datetime import datetime, date, time, timedelta
from twitter_authentication_keys import get_account_credentials
import pprint
import random
import time
import hashlib
import base64
import sys
import re

if __name__ == '__main__':
    consumer_key, consumer_secret, access_token, access_token_secret = get_account_credentials(
    )
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    auth_api = API(auth)
    print "Signing in as: " + auth_api.me().name

    for status in Cursor(auth_api.user_timeline, id="@r0zetta").items():
        if (re.search("Android", status.source)):
            source = "Android"
        elif (re.search("iPhone", status.source)):
            source = "iPhone"
        else:
            source = status.source

        print source
Exemple #18
0
auth = tA.authorization()
api = API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

#Obtain Geo Code Location of Palo Alto California
#places = api.geo_search(query="USA", granularity="country")
#places = api.geo_search(query="Washington DC", granularity="city")
#place_id = places[0].id

#preventiveString, riskString, elderlyString, sentiments, misc = gA.returnSearchString()

searchString = placeSearch[
    index] + ' #COVID-19 OR "COVID-19" OR "pandemic" OR "Corona"'

cursor = Cursor(api.search,
                q=searchString,
                count=20,
                lang="en",
                tweet_mode='extended')
api = API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

maxCount = 1000
count = 0

for tweet in cursor.items():
    count += 1
    fileName = "tweets_" + str(index) + "_" + str(
        datetime.datetime.now().date()).replace('-', '_')
    file = open(outputPath + fileName + '.txt', 'a')
    file.write(str(tweet) + '\n')
    print(count, end='')
    backline()
    (fullname,screenname)=line.split('\t')
    if fullname=='Name': continue
    if screenname in members: continue
    members.append(screenname)

# open file for storing retweets number  
out=open("retweetsby.txt",'w')

#store error message
error_log = open('error_log.txt', 'w')

#retrieve tweets member by member
for member in members:
    print member
    try:
        c = Cursor(api.user_timeline, member)
        tweets = []
        time_is_right = True
        while(time_is_right):
            try:
                page = c.pages().next()
                tweets.extend(page)
                month = page[-1].created_at.month
                year = page[-1].created_at.year

                if year < 2012 :
                    time_is_right = False
            except StopIteration:
                break
        time.sleep(1)
    #error capture and store log into file
 def testidcursorpages(self):
     pages = list(Cursor(self.api.user_timeline, count=1).pages(2))
     self.assertEqual(len(pages), 2)
Exemple #21
0
 def get_friend_list(self, num_friends):
     friend_list = []
     for friend in Cursor(self.twitter_client.friends,
                          id=self.twitter_user).items(num_friends):
         friend_list.append(friend)
     return friend_list
Exemple #22
0
 def get_search_cursor(self, q):
     temp = []
     for tweets in Cursor(self.api.search, q=q, count=200).pages(100):
         for gg in tweets:
             temp.append(gg._json)
     return temp
 def get_home_timeline_tweets(self, num_tweets):
     home_timeline_tweets = []
     for tweet in Cursor(
             self.twitter_client.home_timeline).items(num_tweets):
         home_timeline_tweets.append(tweet)
     return home_timeline_tweets
Exemple #24
0
def get_info(twitter_user):
    # screen_name = sys.argv[1]
    screen_name = twitter_user
    client = get_twitter_client()
    dirname = "users/{}".format(screen_name)
    max_pages = math.ceil(MAX_FRIENDS / 5000)

    try:
        os.makedirs(dirname, mode=0o755, exist_ok=True)
    except OSError:
        print("Directory {} already exists".format(dirname))
    except Exception as e:
        print("Error while creating directory{}".format(dirname))
        print(e)
        sys.exit(1)

    print('Extrayendo a {} \n'.format(screen_name))

    # get followers for a given user
    fjson = "users/{}/followers.jsonl".format(screen_name)
    fcsv = "users/{}/followers.csv".format(screen_name)

    with open(fjson, 'w') as f1, open(fcsv, 'w') as f2:
        for followers in Cursor(client.followers_ids,
                                screen_name=screen_name).pages(max_pages):
            for chunk in paginate(followers, 100):
                users = client.lookup_users(user_ids=chunk)

                # out = [[user.created_at, user.id, user.screen_name,user.name, user.description, user.location] for user in users]
                out = [[user.id, user.screen_name] for user in users]
                writer = csv.writer(f2)
                writer.writerow(["id", "screen_name"])
                writer.writerows(out)

                for user in users:
                    f1.write(json.dumps(user._json) + "\n")
            if len(followers) == 5000:
                print(
                    "Followers: More results available. Sleeping for 60seconds to avoid rate limit"
                )
                time.sleep(60)

    # get friends for a given user
    fjson = "users/{}/friends.jsonl".format(screen_name)
    fcsv = "users/{}/friends.csv".format(screen_name)

    with open(fjson, 'w') as f1, open(fcsv, 'w') as f2:
        for friends in Cursor(client.friends_ids,
                              screen_name=screen_name).pages(max_pages):
            for chunk in paginate(friends, 100):
                users = client.lookup_users(user_ids=chunk)

                # out = [[user.created_at, user.id, user.screen_name,user.name, user.description, user.location] for user in users]
                # writer = csv.writer(f2)
                # writer.writerow(["id","screen_name"])
                # writer.writerows(out)

                for user in users:

                    f1.write(json.dumps(user._json) + "\n")
            if len(friends) == 5000:
                print(
                    "Friends: More results available. Sleeping for 60 seconds to avoid rate limit"
                )
                time.sleep(60)

    # get user's profile
    fname = "users/{}/user_profile.json".format(screen_name)
    with open(fname, 'w') as f:
        profile = client.get_user(screen_name=screen_name)
        f.write(json.dumps(profile._json, indent=4))
Exemple #25
0
	def get_friend_list(self, number_of_friends):
		list_of_friends = []
		for friend in Cursor(self.twitter_client.friends, id=self.twitter_user).items(number_of_friends):
			list_of_friends.append(friend)
		return list_of_friends
Exemple #26
0
	def get_user_timeline_tweets(self, number_of_tweets):
		tweets = []
		#Cursor is a class which allows us to get the user timeline tweets from the chosen user's timeline. If nothing is given there then it's gonna take the tweets from your timeline. The items part decides how many tweets we want.
		for tweet in Cursor(self.twitter_client.user_timeline, id=self.twitter_user).items(number_of_tweets):
			tweets.append(tweet)
		return tweets
    client = get_twitter_client()
    dirname = "users/{}".format(screen_name)
    max_pages = math.ceil(MAX_FRIENDS / 5000)
    try:
        os.makedirs(dirname, mode=0o755, exist_ok=True)
    except OSError:
        print("Directory {} already exists".format(dirname))
    except Exception as e:
        print("Error while creating directory {}".format(dirname))
        print(e)
        sys.exit(1)

    # get followers for a given user
    fname = "users/{}/followers.jsonl".format(screen_name)
    with open(fname, 'w') as f:
        for followers in Cursor(client.followers_ids, screen_name=screen_name).pages(max_pages):
            for chunk in paginate(followers, 100):
                users = client.lookup_users(user_ids=chunk)
                for user in users:
                    f.write(json.dumps(user._json)+"\n")
            if len(followers) == 5000:
                print("More results available. Sleeping for 60 seconds to avoid rate limit")
                time.sleep(60)

    # get friends for a given user
    fname = "users/{}/friends.jsonl".format(screen_name)
    with open(fname, 'w') as f:
        for friends in Cursor(client.friends_ids, screen_name=screen_name).pages(max_pages):
            for chunk in paginate(friends, 100):
                users = client.lookup_users(user_ids=chunk)
                for user in users:
            for file in files:
                done_list.append(file[:-4])
    except FileNotFoundError:
        pass

    done_list.extend(no_tweet_list)

    users=list(set(users)-set(done_list))
    print("City:",city)
    print("No. of users remaining:",len(users))
    if len(users)==0:
        break

    for user_id in users:

        tweets=Cursor(api.user_timeline,user_id=int(user_id)).items()
        last_user_id=user_id
        tweet_count=0

        try:
            print("User id:",user_id)
            for tweet in tweets:

                tweet_count+=1
                print("No. of tweets written:",tweet_count,end='\r')
                try:
                    f=open('tweets/'+city+'/'+user_id+'.txt','a+',encoding='utf-8')
                except FileNotFoundError:
                    os.mkdir('tweets/'+city)
                    f=open('tweets/'+city+'/'+user_id+'.txt','a+',encoding='utf-8')
                f.write('tweet:'+tweet.text+'\n')
 def testidcursoritems(self):
     items = list(Cursor(self.api.user_timeline).items(2))
     self.assertEqual(len(items), 2)
Exemple #30
0
#Prints the first ten items from timeline to terminal

from tweepy import Cursor
from twitter_client import get_twitter_client

if __name__ == '__main__':
    client = get_twitter_client()
    """status variable is instance of tweepy.Status (model used by
			Tweepy to wrap statuses aka tweets)"""
    for status in Cursor(client.home_timeline).items(10):
        print(status.text)
        print("-----------------")
    def testcursorcursorpages(self):
        pages = list(Cursor(self.api.friends_ids).pages(1))
        self.assertTrue(len(pages) == 1)

        pages = list(Cursor(self.api.followers_ids, username).pages(1))
        self.assertTrue(len(pages) == 1)
    def testcursorcursoritems(self):
        items = list(Cursor(self.api.friends_ids).items(2))
        self.assertEqual(len(items), 2)

        items = list(Cursor(self.api.followers_ids, username).items(1))
        self.assertEqual(len(items), 1)
 def get_user_timeline_tweets(self, startDate=datetime.datetime(1,1,1,0,0), endDate=datetime.datetime(9999,1,1,0,0)):
     tweets = []
     for tweet in Cursor(self.twitter_client.user_timeline, id=self.twitter_user, tweet_mode='extended').items():
         if (not tweet.retweeted) and ('RT' not in tweet.full_text) and (tweet.created_at < endDate) and (tweet.created_at > startDate):
             tweets.append(tweet)
     return tweets
 def get_user_timeline_tweets(self, num_tweets):
     tweets = []
     for tweet in Cursor(self.twitter_client.user_timeline,
                         id=self.twitter_user).items(num_tweets):
         tweets.append(tweet)
     return tweets
Exemple #35
0
    def testpagecursoritems(self):
        items = list(Cursor(self.api.user_timeline).items())
        self.assert_(len(items) > 0)

        items = list(Cursor(self.api.user_timeline, 'twitter').items(30))
        self.assert_(len(items) == 30)
Exemple #36
0
    data.append(['Joined At  ', user_profile.created_at])
    return data


def GetScreenName(api, user_id):
    user = api.get_user(id=user_id)
    screen_name = user.screen_name
    return screen_name


filename = "C:\\Python27\\tweet2.csv"
f = open(filename, "w+")
headers = "Name,friends,Possitive,Neggative,Neutral"
f.write(headers + "\n")
#list2=[]
for follower_id in Cursor(api.followers_ids,
                          screen_name='shreya73767208').items():

    sc = GetScreenName(api, follower_id)
    data = GetUserProfileDetails(api, sc)
    print(str(data[0]) + ' : ' + str(data[1]) + ' : ' + str(data[2]))
    l1 = str(data[1]).split(',')
    print l1[1].replace('u', '')
    data1 = getTweets(l1[1].replace('u', ''), 40)
    print data1
    l3 = str(data[6]).split(',')
    f.write(l1[1].replace('u', '') + "," + l3[1].replace(']', '') + "," +
            str(data1[0]) + "," + str(data1[1]) + "," + str(data1[2]) + "\n")
f.close()

#print friend list#######################################################
filename = "C:\\Python27\\tweet.csv"
Exemple #37
0
    def testpagecursorpages(self):
        pages = list(Cursor(self.api.user_timeline).pages())
        self.assert_(len(pages) > 0)

        pages = list(Cursor(self.api.user_timeline, 'twitter').pages(5))
        self.assert_(len(pages) == 5)
Exemple #38
0
 def get_user_timeline_tweets(self, num_tweets): 
     tweets = []         # looping through every tweet provided
     for tweet in Cursor(self.twitter_client.user_timeline, id=self.twitter_user).items(num_tweets):
         tweets.append(tweet)
     return tweets       # list of user timeline tweets
Exemple #39
0
    def testcursorcursoritems(self):
        items = list(Cursor(self.api.friends_ids).items())
        self.assert_(len(items) > 0)

        items = list(Cursor(self.api.followers_ids, 'twitter').items(30))
        self.assert_(len(items) == 30)
city = "Delhi"

access_token = ''
access_token_secret = ''
consumer_key = ''
consumer_secret = ''
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
print('authed')
api = tweepy.API(auth)
places = api.geo_search(query=city)
place_id = places[0].id
coords = (places[0].bounding_box.coordinates)[0]
location = coords[0] + coords[2]
#Cursor implementation
tweets = Cursor(api.search, q='place:' + place_id).items()
i = 0
for tweet in tweets:
    with open('tweets/location/' + city + '.txt', 'a+') as f:
        f.write('TWEET_TEXT: ' + tweet.text + '\n')

#Stream implementation
'''class StdOutListener(StreamListener):

    def on_data(self, tweet):
        tweet=json.loads(tweet)

        try:

            #print(tweet['place']['name'],tweet['user']['name'])
Exemple #41
0
    def testcursorcursorpages(self):
        pages = list(Cursor(self.api.friends_ids).pages())
        self.assert_(len(pages) > 0)

        pages = list(Cursor(self.api.followers_ids, 'twitter').pages(5))
        self.assert_(len(pages) == 5)
Exemple #42
0
def downloadTweets(tweetData):
    # Read authentication keys from .dat file
    print("trying to find twitter authentication keys from: "+ os.getcwd() + "/keys.dat")
    keys = open("keys.dat","r")

    # Authentication and connection to Twitter API.
    consumer_key = keys.readline()[:-1]
    consumer_secret = keys.readline()[:-1]
    access_key = keys.readline()[:-1]
    access_secret = keys.readline()[:-1]

    # Close authentication file
    keys.close()

    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_key, access_secret)
    api = tweepy.API(auth)

    # Usernames whose tweets we want to gather.
    users = ["realDonaldTrump","tedcruz","LindseyGrahamSC","SpeakerRyan","BarackObama","GovGaryJohnson","BernieSanders","HillaryClinton","DrJillStein"]

    with open(tweetData, 'wb') as file:
        writer = unicodecsv.writer(file, delimiter = ',', quotechar = '"')
        # Write header row.
        writer.writerow(["politician_name",
                        "politician_username",
                        "tweet_text",
                        "tweet_retweet_count",
                        "tweet_favorite_count",
                        "tweet_hashtags",
                        "tweet_hashtags_count",
                        "tweet_urls",
                        "tweet_urls_count",
                        "tweet_user_mentions",
                        "tweet_user_mentions_count",
                        "tweet_by_trump"])
        
        # For each Twitter username in the users array
        for user in users:
            # Gather info specific to the current user.
            user_obj = api.get_user(user)
            user_info = [user_obj.name,
                         user_obj.screen_name]

            startProgress("Downloading tweets from: " + user)
            # Maximum amounts of tweets to retrieve
            max_tweets = 1000;

            # Count the amount of tweets retrieved
            count = 0;

            # Get 1000 most recent tweets for the current user.
            for tweet in Cursor(api.user_timeline, screen_name = user).items(max_tweets):

                # Show progress
                progress(count/(max_tweets/100)) 

                # Increase count for tweets
                count += 1

                # Remove all retweets.
                if tweet.text[0:3] == "RT ":
                    continue

                # Get info specific to the current tweet of the current user.
                tweet_info = [unidecode(tweet.text),
                              tweet.retweet_count,
                              tweet.favorite_count]

                # Below entities are stored as variable-length dictionaries, if present.
                hashtags = []
                hashtags_data = tweet.entities.get('hashtags', None)
                if(hashtags_data != None):
                    for i in range(len(hashtags_data)):
                        hashtags.append(unidecode(hashtags_data[i]['text']))

                urls = []
                urls_data = tweet.entities.get('urls', None)
                if(urls_data != None):
                    for i in range(len(urls_data)):
                        urls.append(unidecode(urls_data[i]['url']))

                user_mentions = []
                user_mentions_data = tweet.entities.get('user_mentions', None)
                if(user_mentions_data != None):
                    for i in range(len(user_mentions_data)):
                        user_mentions.append(unidecode(user_mentions_data[i]['screen_name']))
                        
                tweet_by_trump = 0
                if(user_obj.screen_name=='realDonaldTrump'):
                    tweet_by_trump = 1

                more_tweet_info = [', '.join(hashtags),
                                   len(hashtags),
                                   ', '.join(urls),
                                   len(urls),
                                   ', '.join(user_mentions),
                                   len(user_mentions),tweet_by_trump]

                # Write data to CSV.
                writer.writerow(user_info + tweet_info + more_tweet_info)



            endProgress()

            print("Wrote tweets by %s to CSV." % user)
            tweets = item.statuses_count
            account_created_date = item.created_at
            delta = datetime.utcnow() - account_created_date
            account_age_days = delta.days
            # print("Account age (in days): " + str(account_age_days))
            tweets_per_day = 0
            if account_age_days > 0:
                tweets_per_day = "%.2f" % (float(tweets) /
                                           float(account_age_days))

            hashtags = []
            tweets_list = []
            tweet_count = 0
            end_date = datetime.utcnow() - timedelta(days=30)
            for status in Cursor(auth_api.user_timeline, id=target).items():
                tweet_count += 1
                if hasattr(status, "text"):
                    tweets_list.append(status.text)
                if hasattr(status, "entities"):
                    entities = status.entities
                    if "hashtags" in entities:
                        for ent in entities["hashtags"]:
                            if ent is not None:
                                if "text" in ent:
                                    hashtag = ent["text"]
                                    if hashtag is not None:
                                        hashtags.append(hashtag)
                if status.created_at < end_date:
                    break
Exemple #44
0
#Source Code for Mining data on Twitter with Python Course by TigerStyle Code Academy 


import sys
import json
from tweepy import Cursor
from twitter_client import get_twitter_client

if __name__ == '__main__':
	user = sys.argv[1]
	client = get_twitter_client()

	fname = "user_timeline_{}.jsonl".format(user)

	with open(fname, 'w') as f:
		for page in Cursor(client.user_timeline, screen_name=user, count=200).pages(16):
			for status in page:
				f.write(json.dumps(status._json)+"\n")
 def testcursorsetstartcursor(self):
     c = Cursor(self.api.friends_ids, cursor=123456)
     self.assertEqual(c.iterator.next_cursor, 123456)
     self.assertFalse('cursor' in c.iterator.kwargs)
import tweepy
from tweepy import Cursor
import time
import json

consumer_key = 'p8rnniy2PVcnQR7I01s71g'
consumer_secret = 'tLaYYeiXzkq1wDmS2gEHTSEArNxk8tSd4D3bQPX6FNM'
access_token = '1196322271-BN4pBpveJuKSfUscrwss7T7KckX0Mgv75vJoVfp'
access_token_secret = 'A4yooP5jkdUfqI1xMi7wzVi9XtCh8uwScrPvOZyXR4nTz'

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

api = tweepy.API(auth)

rep_cursor = Cursor(api.followers, id="repcurrie")

for page in rep_cursor.pages():
    try:
        print len(page)
        for user in page:
            user_str = str(user)
            username = user_str.split(", 'screen_name': ")[1].split(", '")[0]
            print username
        time.sleep(5)
    except BaseException, e:
        print "failed: ", str(e)
        time.sleep(10)
    
print "done pulling"
Exemple #47
0
import json
from tweepy import Cursor
from twitter_client import get_twitter_client

if __name__ == '__main__':
    client = get_twitter_client()

# retrieve up to 200 statuses per page for 4 pages of recent timeline
with open('home_timeline.jsonl', 'w') as f:
    for page in Cursor(client.home_timeline, count=200).pages(4):
        for status in page:
            f.write(json.dumps(status._json) + "\n")