Esempio n. 1
0
def run_user_timeline_download():
    print('downloading user-timelines...')
    api = API(auth, parser=JSONParser())
    user_str_ids = []
    with open('data/top_users_to_PrEP.txt') as f_in:
        for line_no, line in enumerate(f_in):
            if line_no == 1000:
                break
            user_str_ids.append(line)

    users = []
    pages = list(range(0, 150))
    with open('data/user_timeline_tweets.json', 'w') as f_out:
        for user_id in user_str_ids:
            try:
                time.sleep(60 * 16)
                for page in pages:
                    for twt in api.user_timeline(user_id, count=20, page=page):
                        f_out.write(json.dumps(twt) + '\n')
                users.append(user_id)
            except:
                pass

    print('done with user-timelines...')
    print(users)
    print(len(user_str_ids))
Esempio n. 2
0
class TimelinesFetcher(object):
    def __init__(self):
        auth = get_tweepy_oauth_handler()
        # This could be optimized once we have more than one credentials set in
        # the DB (we could loop over them).
        self.api = TwitterAPI(auth_handler=auth,
                              wait_on_rate_limit=True,
                              wait_on_rate_limit_notify=True)

    def start(self):
        while True:
            for producer in Producer.select().order_by(fn.Random()):
                self.fetch_producer(producer)

    def fetch_producer(self, producer):
        # 200 is the page limit
        kwargs = {"count": STATUSES_PER_PRODUCER, "user_id": producer.id_str}

        if producer.protected:
            logger.debug("Removing producer '%s' because they're protected",
                         producer.screen_name)
            producer.delete_instance(recursive=True)
            return

        if producer.last_status_id != 0:
            kwargs["since_id"] = producer.last_status_id

        #try:
        timeline = list(self.api.user_timeline(**kwargs))
        #except TweepError as e:
        #    logger.warn(e)
        #    return

        logger.debug("Importing %d statuses from @%s" %
                     (len(timeline), producer.screen_name))

        if timeline:
            for st in timeline:
                self.on_status(producer, st)

            producer.last_status_id = timeline[-1].id
            producer.save()

    def on_status(self, producer, status):
        if filter_status(status):
            import_status(status, author=producer)
            return True
Esempio n. 3
0
class TweepyAPITests(unittest.TestCase):

    def setUp(self):
        auth = OAuthHandler(oauth_consumer_key, oauth_consumer_secret)
        auth.set_access_token(oauth_token, oauth_token_secret)
        self.api = API(auth)
        self.api.retry_count = 2
        self.api.retry_delay = 5

    def testhometimeline(self):
        self.api.home_timeline()

    def testfriendstimeline(self):
        self.api.friends_timeline()

    def testusertimeline(self):
        self.api.user_timeline()
        self.api.user_timeline('twitter')

    def testmentions(self):
        self.api.mentions()

    def testretweetedbyme(self):
        self.api.retweeted_by_me()

    def testretweetedbyuser(self):
        self.api.retweeted_by_user('twitter')

    def testretweetedtome(self):
        self.api.retweeted_to_me()

    def testretweetsofme(self):
        self.api.retweets_of_me()

    def testretweet(self):
        s = self.api.retweet(123)
        s.destroy()

    def testretweets(self):
        self.api.retweets(123)

    def testgetstatus(self):
        self.api.get_status(id=123)

    def testupdateanddestroystatus(self):
        # test update
        text = 'testing %i' % random.randint(0, 1000)
        update = self.api.update_status(status=text)
        self.assertEqual(update.text, text)

        # test destroy
        deleted = self.api.destroy_status(id=update.id)
        self.assertEqual(deleted.id, update.id)

    def testgetuser(self):
        u = self.api.get_user('twitter')
        self.assertEqual(u.screen_name, 'twitter')

        u = self.api.get_user(783214)
        self.assertEqual(u.screen_name, 'twitter')

    def testsearchusers(self):
        self.api.search_users('twitter')

    def testme(self):
        me = self.api.me()
        self.assertEqual(me.screen_name, username)

    def testfriends(self):
        self.api.friends()

    def testfollowers(self):
        self.api.followers()

    def testdirectmessages(self):
        self.api.direct_messages()

    def testsentdirectmessages(self):
        self.api.sent_direct_messages()

    def testsendanddestroydirectmessage(self):
        # send
        sent_dm = self.api.send_direct_message(username, text='test message')
        self.assertEqual(sent_dm.text, 'test message')
        self.assertEqual(sent_dm.sender.screen_name, username)
        self.assertEqual(sent_dm.recipient.screen_name, username)

        # destroy
        destroyed_dm = self.api.destroy_direct_message(sent_dm.id)
        self.assertEqual(destroyed_dm.text, sent_dm.text)
        self.assertEqual(destroyed_dm.id, sent_dm.id)
        self.assertEqual(destroyed_dm.sender.screen_name, username)
        self.assertEqual(destroyed_dm.recipient.screen_name, username)

    def testcreatedestroyfriendship(self):
        enemy = self.api.destroy_friendship('twitter')
        self.assertEqual(enemy.screen_name, 'twitter')
        self.assertFalse(self.api.exists_friendship(username, 'twitter'))

        friend = self.api.create_friendship('twitter')
        self.assertEqual(friend.screen_name, 'twitter')
        self.assertTrue(self.api.exists_friendship(username, 'twitter'))

    def testshowfriendship(self):
        source, target = self.api.show_friendship(target_screen_name='twtiter')
        self.assert_(isinstance(source, Friendship))
        self.assert_(isinstance(target, Friendship))

    def testfriendsids(self):
        self.api.friends_ids(username)

    def testfollowersids(self):
        self.api.followers_ids(username)

    def testverifycredentials(self):
        self.assertNotEqual(self.api.verify_credentials(), False)

        # make sure that `me.status.entities` is not an empty dict
        me = self.api.verify_credentials(include_entities=True)
        self.assertTrue(me.status.entities)

        # `status` shouldn't be included
        me = self.api.verify_credentials(skip_status=True)
        self.assertFalse(hasattr(me, 'status'))

    def testratelimitstatus(self):
        self.api.rate_limit_status()

    def testupdateprofilecolors(self):
        original = self.api.me()
        updated = self.api.update_profile_colors(
            '000', '000', '000', '000', '000')

        # restore colors
        self.api.update_profile_colors(
            original.profile_background_color,
            original.profile_text_color,
            original.profile_link_color,
            original.profile_sidebar_fill_color,
            original.profile_sidebar_border_color
        )

        self.assertEqual(updated.profile_background_color, '000')
        self.assertEqual(updated.profile_text_color, '000')
        self.assertEqual(updated.profile_link_color, '000')
        self.assertEqual(updated.profile_sidebar_fill_color, '000')
        self.assertEqual(updated.profile_sidebar_border_color, '000')

    """
    def testupateprofileimage(self):
        self.api.update_profile_image('examples/profile.png')

    def testupdateprofilebg(self):
        self.api.update_profile_background_image('examples/bg.png')
    """

    def testupdateprofile(self):
        original = self.api.me()
        profile = {
            'name': 'Tweepy test 123',
            'url': 'http://www.example.com',
            'location': 'pytopia',
            'description': 'just testing things out'
        }
        updated = self.api.update_profile(**profile)
        self.api.update_profile(
            name=original.name, url=original.url,
            location=original.location, description=original.description
        )

        for k, v in profile.items():
            if k == 'email':
                continue
            self.assertEqual(getattr(updated, k), v)

    def testfavorites(self):
        self.api.favorites()

    def testcreatedestroyfavorite(self):
        self.api.create_favorite(4901062372)
        self.api.destroy_favorite(4901062372)

    def testenabledisablenotifications(self):
        self.api.enable_notifications('twitter')
        self.api.disable_notifications('twitter')

    def testcreatedestroyblock(self):
        self.api.create_block('twitter')
        self.assertEqual(self.api.exists_block('twitter'), True)
        self.api.destroy_block('twitter')
        self.assertEqual(self.api.exists_block('twitter'), False)
        self.api.create_friendship('twitter')  # restore

    def testblocks(self):
        self.api.blocks()

    def testblocksids(self):
        self.api.blocks_ids()

    def testcreateupdatedestroylist(self):
        self.api.create_list('tweeps')
        # XXX: right now twitter throws a 500 here,
        # issue is being looked into by twitter.
        # self.api.update_list('tweeps', mode='private')
        self.api.destroy_list('tweeps')

    def testlists(self):
        self.api.lists()

    def testlistsmemberships(self):
        self.api.lists_memberships()

    def testlistssubscriptions(self):
        self.api.lists_subscriptions()

    def testlisttimeline(self):
        self.api.list_timeline('applepie', 'stars')

    def testgetlist(self):
        self.api.get_list('applepie', 'stars')

    def testlistmembers(self):
        self.api.list_members('applepie', 'stars')

    def testislistmember(self):
        uid = self.api.get_user('applepie').id
        self.api.is_list_member('applepie', 'stars', uid)

    def testsubscribeunsubscribelist(self):
        self.api.subscribe_list('applepie', 'stars')
        self.api.unsubscribe_list('applepie', 'stars')

    def testlistsubscribers(self):
        self.api.list_subscribers('applepie', 'stars')

    def testissubscribedlist(self):
        uid = self.api.get_user('applepie').id
        self.api.is_subscribed_list('applepie', 'stars', uid)

    def testsavedsearches(self):
        s = self.api.create_saved_search('test')
        self.api.saved_searches()
        self.assertEqual(self.api.get_saved_search(s.id).query, 'test')
        self.api.destroy_saved_search(s.id)

    def testsearch(self):
        self.api.search('tweepy')

    def testtrends(self):
        self.api.trends_daily()
        self.api.trends_weekly()

    def testgeoapis(self):
        self.api.geo_id(id='c3f37afa9efcf94b')  # Austin, TX, USA
        self.api.nearby_places(lat=30.267370168467806,
                               long=-97.74261474609375)  # Austin, TX, USA
        self.api.reverse_geocode(lat=30.267370168467806,
                                 long=-97.74261474609375)  # Austin, TX, USA
class GetTwitterData():
    def __init__(self, auth):
        self.auth = auth
        self.api = API(self.auth)

    def get_all_tweets(self, screen_name, tweet_count):

        # initialize a list to hold all the tweepy Tweets
        alltweets = []

        # make initial request for most recent tweets (200 is the maximum allowed count)
        new_tweets = self.api.user_timeline(screen_name=screen_name, count=tweet_count)

        # save most recent tweets
        alltweets.extend(new_tweets)

        # save the id of the oldest tweet less one
        oldest = alltweets[-1].id - 1

        # keep grabbing tweets until there are no tweets left to grab
        while len(new_tweets) > 0:
            print "getting tweets before %s" % (oldest)

            # all subsiquent requests use the max_id param to prevent duplicates
            new_tweets = self.api.user_timeline(screen_name=screen_name, count=tweet_count, max_id=oldest)

            # save most recent tweets
            alltweets.extend(new_tweets)

            # update the id of the oldest tweet less one
            oldest = alltweets[-1].id - 1

            print "...%s tweets downloaded so far" % (len(alltweets))

        # transform the tweepy tweets into a 2D array that will populate the csv
        out_tweets = [[screen_name, tweet.id_str, tweet.created_at, tweet.text.encode("utf-8")] for tweet in alltweets]

        # write the csv
        with open(users_tweets_path, 'wb') as f:
            writer = csv.writer(f)
            writer.writerow(["screen_name", "id", "created_at", "text"])
            writer.writerows(out_tweets)

        pass

    def new_get_all_tweets(self, screen_name, tweet_count):

        # make initial request for most recent tweets (200 is the maximum allowed count)
        new_tweets = self.api.user_timeline(screen_name=screen_name, count=tweet_count)

        # transform the tweepy tweets into a 2D array that will populate the csv
        out_tweets = [[screen_name, tweet.id_str, tweet.created_at, tweet.text.encode("utf-8")] for tweet in new_tweets]
        # write the csv
        with open(users_tweets_path, 'wb') as f:
            writer = csv.writer(f)
            writer.writerow(["screen_name", "id", "created_at", "text"])
            writer.writerows(out_tweets)

    def find_friends(self, screen_name):

        print "screen_name: " + screen_name

        # page = self.api.followers_ids(screen_name=screen_name)

        for id in Cursor(self.api.followers_ids, screen_name=screen_name,count=50).pages():
            print  id
            print "ids are: " + str(len(id))
            # if (len(id) > 90):
            #     array_offset = (len(id) % 90)
            #     friends_list=[]
            #     for x in range(1, array_offset):
            #         print "cutted id is:"
            #         print id[((x - 1) * 90):(x * 90)]
            #         friends = [user.screen_name for user in self.api.lookup_users(user_ids=str(id[((x - 1) * 90):(x * 90)]))]
            #         friends_list.extend(friends)
            #
            # else:
            #     friends_list = [user.screen_name for user in self.api.lookup_users(user_ids=id)]
            friends_list = [user.screen_name for user in self.api.lookup_users(user_ids=id)]
            print "list of users\n"
            print friends_list
            friends_list_output = [[screen_name, id[indx], friend]
                                   for indx, friend in enumerate(friends_list)]
            print friends_list_output
            with open(users_friends_path, 'ab') as f:
                writer = csv.writer(f)
                writer.writerow(["screen_name", "id", "friends"])
                writer.writerows(friends_list_output)

            time.sleep(1)

    def readfile(self):
        tweets_data = []
        tweets_file = open(tweets_data_path, "r")
        for line in tweets_file:
            try:
                tweet = json.loads(line)
                tweets_data.append(tweet)
            except:
                continue

        print len(tweets_data)
        counter = 1
        screen_names = []
        for tweet in tweets_data:
            screen_names.extend(tweet['user']['screen_name'])
            try:
                self.new_get_all_tweets(tweet['user']['screen_name'], 5)
            except Exception, e:
                print "error:\n"
                print str(e)

            try:
                print tweet['user']['screen_name']
                self.find_friends(tweet['user']['screen_name'])
            except Exception, e:
                print "fail:\n"
                print str(e)
            print counter
            # print 'tweet:' + tweet['text'] + "\n"
            # print 'user name:' + tweet['user']['name'] + "\n"
            # print 'user id:' + str(tweet['user']['id_str']) + "\n"
            # print "\nuser is flowing \n"
            # print 'user name:' + tweet['user']['name']
            #
            # print "\n>>>>>>>>>>>>\n"
            counter = counter + 1