Beispiel #1
0
def run_user_timeline_download():
    print('downloading user-timelines...')
    api = API(auth, parser=JSONParser())
    user_str_ids = []
    with open('data/top_users_to_PrEP.txt') as f_in:
        for line_no, line in enumerate(f_in):
            if line_no == 1000:
                break
            user_str_ids.append(line)

    users = []
    pages = list(range(0, 150))
    with open('data/user_timeline_tweets.json', 'w') as f_out:
        for user_id in user_str_ids:
            try:
                time.sleep(60 * 16)
                for page in pages:
                    for twt in api.user_timeline(user_id, count=20, page=page):
                        f_out.write(json.dumps(twt) + '\n')
                users.append(user_id)
            except:
                pass

    print('done with user-timelines...')
    print(users)
    print(len(user_str_ids))
Beispiel #2
0
 def _get_rate_limit_status(self, key, secret):
     """
     Get rate limit status for specified access token key.
     """
     auth = OAuthHandler(self.consumer_key, self.consumer_secret)
     auth.set_access_token(key, secret)
     api = API(auth)
     return api.rate_limit_status()
Beispiel #3
0
 def get_username(self):
     if self.username is None:
         api = API(self)
         user = api.verify_credentials()
         if user:
             self.username = user.screen_name
         else:
             raise TweepError("Unable to get username, invalid oauth token!")
     return self.username
def tweet(answer):
    CONSUMER_KEY = config.get("auth", "CONSUMER_KEY")
    CONSUMER_SECRET = config.get("auth", "CONSUMER_SECRET")
    ACCESS_TOKEN = config.get("auth", "ACCESS_TOKEN")
    ACCESS_TOKEN_SECRET = config.get("auth", "ACCESS_TOKEN_SECRET")

    auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
    auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
    api = API(auth)
    result = api.update_status(status=answer)
Beispiel #5
0
 def get_user_id(self):
     if self.user_id is None:
         api = API(self)
         user = api.verify_credentials()
         if user:
             self.username = user.screen_name
             self.user_id = user.id
         else:
             raise TweepError('Unable to get user_id,'
                              ' invalid oauth token!')
     return self.user_id
Beispiel #6
0
    def _testoauth(self):
        auth = OAuthHandler(oauth_consumer_key, oauth_consumer_secret)

        # test getting access token
        auth_url = auth.get_authorization_url()
        print 'Please authorize: ' + auth_url
        verifier = raw_input('PIN: ').strip()
        self.assert_(len(verifier) > 0)
        access_token = auth.get_access_token(verifier)
        self.assert_(access_token is not None)

        # build api object test using oauth
        api = API(auth)
        s = api.update_status('test %i' % random.randint(0, 1000))
        api.destroy_status(s.id)
Beispiel #7
0
def update_twitter_profile(user):
    a = API()
    try:
        profile = user.get_profile()
        twitter_user = a.get_user(user_id=profile.twitter_profile.twitter_id)
    except:
        twitter_user = None
    
    if twitter_user:
        profile.user.first_name = twitter_user.name.split(" ")[0]
        profile.user.last_name = " ".join(twitter_user.name.split(" ")[1:])
        profile.user.save()    

        profile.website = twitter_user.url    
        profile.profile_image_url = twitter_user.profile_image_url    
        profile.description = twitter_user.description    
        profile.twitter_name = twitter_user.screen_name
        profile.location=twitter_user.location
        profile.save()
def scrapeThread(index):
    auth = OAuthHandler(consumerKeys[index], consumerSecrets[index])
    auth.set_access_token(accessTokens[index], accessSecrets[index])
    api = API(auth)
  
    try:
        api.verify_credentials()
    except TweepError:
        print "Failed to authenticate - most likely reached rate limit/incorrect credentials!"
        return
    else:
        print "You have successfully logged on as: " + api.me().screen_name
  
    for i in range(0, numDays):
        for query in queries[index]:
            count = 0
            cursor = Cursor(api.search,
                            q=quote(query.encode('utf-8')),
                            lang=langs[index],
                            since=sinces[i],
                            until=untils[i],
                            include_entities=True).items()
            while True:
                try:
                    tweet = cursor.next()
                    utc = datetime.now().strftime('%Y%m%dT%H%M%S%f')
                    outPath = path.join(outDir, sinces[i], langs[index], utc + '.json')
                    with open(outPath, 'w') as output:
                        output.write(dumps(tweet._json, ensure_ascii=False).encode('utf8'))
                    count += 1
                    if count == int(perDay / len(queries[index])):
                        break
                except TweepError:
                    print langs[index] + " - rate limit reached! Pausing thread for 15 minutes."
                    sleep(60 * 15)
                    continue
                except StopIteration:
                    break
            print str(count) + " tweets stored in " + outPath
Beispiel #9
0
class Tweet():
	
	def __init__(self, auth):
		self.auth = auth
		self.api = API(auth)

	def tweet_with_media(self, fn, status):
		self.api.update_with_media(fn, status=status)

	def update_with_media(self, fn, status, tweet_id):
		# self.api.update_with_media(filename=fn, status=status, in_reply_to_status_id=tweet_id)
		media = self.api.media_upload(fn)
		self.api.update_status(status=status, reply_to_status_id=tweet_id, media_ids=[media.media_id])

	def update(self, status, tweet_id):
		self.api.update_status(status=status, reply_to_status_id=tweet_id)
Beispiel #10
0
 def __init__(self, api=None):
     self.api = API(retry_count=10,
                    retry_delay=30,
                    timeout=1000,
                    wait_on_rate_limit=True)
     self.gmaps = googlemaps.Client(key=PARAMS['googlemaps_apikey'])
 def __init__(self, api=None):
     self.api = API(retry_count=100, timeout=1000)
Beispiel #12
0
 def __init__(self, twitter_id=None, twitter_sn=None, api=None):
     self.api = api or API()
     self.twitter_id = twitter_id
     self.twitter_sn = twitter_sn
Beispiel #13
0
 def setUp(self):
     self.auth = create_auth()
     self.api = API(self.auth)
     self.api.retry_count = 2
     self.api.retry_delay = 5
 def __init__(self, dataD, api=None):
     self.api = api or API()
     self.dataD = dataD
Beispiel #15
0
def update_tweet(text):
    auth = get_oauth()
    api = API(auth)
    api.update_status(status=text)
    
    
forward = []
backward = []
if __name__ == "__main__":
    while 1:
        try:
    	    forward = []
    	    backward = []
            #outfile = sys.argv[1]
            #auth = tweepy.BasicAuthHandler('reza_shz', 'mehdireza') 
            auth = tweepy.OAuthHandler("xg2hLKvf1nxw1TUALvx5xA", "MkX0lDUik0mJuc6nxserddbQDWd7ZTErQN6Tf0OhOM")
            auth.set_access_token("174566652-MOGbxytlmUHIN5tEMgl5rgqWdWaIQXYZ6XPyYKl1", "yem38OfoUbsoPZvOVr3k0n3X7JSUDYD8oxAKXvrJw6k")
            twitterApi = API(auth_handler=auth,
                    host='api.twitter.com', search_host='search.twitter.com',
                     cache=FileCache("cache", timeout = -1), secure=False, api_root='/1', search_root='',
                    retry_count=0, retry_delay=0, retry_errors=None,
                    parser=None)
                        
            #username1, username2,listUsernames = readFile(outfile)
            user1 = twitterApi.get_user(sys.argv[1]) #@UndefinedVariable
            user2 = twitterApi.get_user(sys.argv[2]) #@UndefinedVariable
            
            forward.append({"obj":user1, "cursor":-1, "friends":[], "cursor_obj":-1, "path":[]})
            backward.append({"obj":user2, "cursor":-1, "cursor_obj":-1,"path":[], "followers":[] })
            reqs = 0
            while 1:
        	fin, path = go_backward()
		reqs +=1;print reqs
		if fin: print path;reqs=-2;break
        	while has_node(backward):
Beispiel #17
0
 def setUp(self):
     auth = OAuthHandler(oauth_consumer_key, oauth_consumer_secret)
     auth.set_access_token(oauth_token, oauth_token_secret)
     self.api = API(auth)
     self.api.retry_count = 2
     self.api.retry_delay = 5
Beispiel #18
0
class TweepyAPITests(unittest.TestCase):

    def setUp(self):
        auth = OAuthHandler(oauth_consumer_key, oauth_consumer_secret)
        auth.set_access_token(oauth_token, oauth_token_secret)
        self.api = API(auth)
        self.api.retry_count = 2
        self.api.retry_delay = 5

    def testhometimeline(self):
        self.api.home_timeline()

    def testfriendstimeline(self):
        self.api.friends_timeline()

    def testusertimeline(self):
        self.api.user_timeline()
        self.api.user_timeline('twitter')

    def testmentions(self):
        self.api.mentions()

    def testretweetedbyme(self):
        self.api.retweeted_by_me()

    def testretweetedbyuser(self):
        self.api.retweeted_by_user('twitter')

    def testretweetedtome(self):
        self.api.retweeted_to_me()

    def testretweetsofme(self):
        self.api.retweets_of_me()

    def testretweet(self):
        s = self.api.retweet(123)
        s.destroy()

    def testretweets(self):
        self.api.retweets(123)

    def testgetstatus(self):
        self.api.get_status(id=123)

    def testupdateanddestroystatus(self):
        # test update
        text = 'testing %i' % random.randint(0, 1000)
        update = self.api.update_status(status=text)
        self.assertEqual(update.text, text)

        # test destroy
        deleted = self.api.destroy_status(id=update.id)
        self.assertEqual(deleted.id, update.id)

    def testgetuser(self):
        u = self.api.get_user('twitter')
        self.assertEqual(u.screen_name, 'twitter')

        u = self.api.get_user(783214)
        self.assertEqual(u.screen_name, 'twitter')

    def testsearchusers(self):
        self.api.search_users('twitter')

    def testme(self):
        me = self.api.me()
        self.assertEqual(me.screen_name, username)

    def testfriends(self):
        self.api.friends()

    def testfollowers(self):
        self.api.followers()

    def testdirectmessages(self):
        self.api.direct_messages()

    def testsentdirectmessages(self):
        self.api.sent_direct_messages()

    def testsendanddestroydirectmessage(self):
        # send
        sent_dm = self.api.send_direct_message(username, text='test message')
        self.assertEqual(sent_dm.text, 'test message')
        self.assertEqual(sent_dm.sender.screen_name, username)
        self.assertEqual(sent_dm.recipient.screen_name, username)

        # destroy
        destroyed_dm = self.api.destroy_direct_message(sent_dm.id)
        self.assertEqual(destroyed_dm.text, sent_dm.text)
        self.assertEqual(destroyed_dm.id, sent_dm.id)
        self.assertEqual(destroyed_dm.sender.screen_name, username)
        self.assertEqual(destroyed_dm.recipient.screen_name, username)

    def testcreatedestroyfriendship(self):
        enemy = self.api.destroy_friendship('twitter')
        self.assertEqual(enemy.screen_name, 'twitter')
        self.assertFalse(self.api.exists_friendship(username, 'twitter'))

        friend = self.api.create_friendship('twitter')
        self.assertEqual(friend.screen_name, 'twitter')
        self.assertTrue(self.api.exists_friendship(username, 'twitter'))

    def testshowfriendship(self):
        source, target = self.api.show_friendship(target_screen_name='twtiter')
        self.assert_(isinstance(source, Friendship))
        self.assert_(isinstance(target, Friendship))

    def testfriendsids(self):
        self.api.friends_ids(username)

    def testfollowersids(self):
        self.api.followers_ids(username)

    def testverifycredentials(self):
        self.assertNotEqual(self.api.verify_credentials(), False)

        # make sure that `me.status.entities` is not an empty dict
        me = self.api.verify_credentials(include_entities=True)
        self.assertTrue(me.status.entities)

        # `status` shouldn't be included
        me = self.api.verify_credentials(skip_status=True)
        self.assertFalse(hasattr(me, 'status'))

    def testratelimitstatus(self):
        self.api.rate_limit_status()

    def testupdateprofilecolors(self):
        original = self.api.me()
        updated = self.api.update_profile_colors(
            '000', '000', '000', '000', '000')

        # restore colors
        self.api.update_profile_colors(
            original.profile_background_color,
            original.profile_text_color,
            original.profile_link_color,
            original.profile_sidebar_fill_color,
            original.profile_sidebar_border_color
        )

        self.assertEqual(updated.profile_background_color, '000')
        self.assertEqual(updated.profile_text_color, '000')
        self.assertEqual(updated.profile_link_color, '000')
        self.assertEqual(updated.profile_sidebar_fill_color, '000')
        self.assertEqual(updated.profile_sidebar_border_color, '000')

    """
    def testupateprofileimage(self):
        self.api.update_profile_image('examples/profile.png')

    def testupdateprofilebg(self):
        self.api.update_profile_background_image('examples/bg.png')
    """

    def testupdateprofile(self):
        original = self.api.me()
        profile = {
            'name': 'Tweepy test 123',
            'url': 'http://www.example.com',
            'location': 'pytopia',
            'description': 'just testing things out'
        }
        updated = self.api.update_profile(**profile)
        self.api.update_profile(
            name=original.name, url=original.url,
            location=original.location, description=original.description
        )

        for k, v in profile.items():
            if k == 'email':
                continue
            self.assertEqual(getattr(updated, k), v)

    def testfavorites(self):
        self.api.favorites()

    def testcreatedestroyfavorite(self):
        self.api.create_favorite(4901062372)
        self.api.destroy_favorite(4901062372)

    def testenabledisablenotifications(self):
        self.api.enable_notifications('twitter')
        self.api.disable_notifications('twitter')

    def testcreatedestroyblock(self):
        self.api.create_block('twitter')
        self.assertEqual(self.api.exists_block('twitter'), True)
        self.api.destroy_block('twitter')
        self.assertEqual(self.api.exists_block('twitter'), False)
        self.api.create_friendship('twitter')  # restore

    def testblocks(self):
        self.api.blocks()

    def testblocksids(self):
        self.api.blocks_ids()

    def testcreateupdatedestroylist(self):
        self.api.create_list('tweeps')
        # XXX: right now twitter throws a 500 here,
        # issue is being looked into by twitter.
        # self.api.update_list('tweeps', mode='private')
        self.api.destroy_list('tweeps')

    def testlists(self):
        self.api.lists()

    def testlistsmemberships(self):
        self.api.lists_memberships()

    def testlistssubscriptions(self):
        self.api.lists_subscriptions()

    def testlisttimeline(self):
        self.api.list_timeline('applepie', 'stars')

    def testgetlist(self):
        self.api.get_list('applepie', 'stars')

    def testlistmembers(self):
        self.api.list_members('applepie', 'stars')

    def testislistmember(self):
        uid = self.api.get_user('applepie').id
        self.api.is_list_member('applepie', 'stars', uid)

    def testsubscribeunsubscribelist(self):
        self.api.subscribe_list('applepie', 'stars')
        self.api.unsubscribe_list('applepie', 'stars')

    def testlistsubscribers(self):
        self.api.list_subscribers('applepie', 'stars')

    def testissubscribedlist(self):
        uid = self.api.get_user('applepie').id
        self.api.is_subscribed_list('applepie', 'stars', uid)

    def testsavedsearches(self):
        s = self.api.create_saved_search('test')
        self.api.saved_searches()
        self.assertEqual(self.api.get_saved_search(s.id).query, 'test')
        self.api.destroy_saved_search(s.id)

    def testsearch(self):
        self.api.search('tweepy')

    def testtrends(self):
        self.api.trends_daily()
        self.api.trends_weekly()

    def testgeoapis(self):
        self.api.geo_id(id='c3f37afa9efcf94b')  # Austin, TX, USA
        self.api.nearby_places(lat=30.267370168467806,
                               long=-97.74261474609375)  # Austin, TX, USA
        self.api.reverse_geocode(lat=30.267370168467806,
                                 long=-97.74261474609375)  # Austin, TX, USA
def forecast(city):
    config = ConfigParser.RawConfigParser()
    config.read('settings.cfg')
    WOEID = config.get(city, 'WOEID')
    baseurl = "https://query.yahooapis.com/v1/public/yql?"
    yql_query = ("select * from weather.forecast where woeid=" + WOEID)
    yql_url = baseurl + urllib.urlencode({'q': yql_query}) + "&format=xml"
    forecastfile = urllib.urlopen(yql_url)
    tree = ET.parse(forecastfile)
    query = tree.getroot()
    root = query[0]
    channel = root[0]
    item = channel[12]
    description = item[5]
    forecast = item[7]
    high = forecast.attrib['high']
    low = forecast.attrib['low']
    forecastText = forecast.attrib['text']
    currentTemp = description.attrib['temp']
    currentText = description.attrib['text']
    currentC = description.attrib['code']
    currentCondition = int(currentC)
    timeStamp = description.attrib['date']
    forecastfile.close()

    rainCodes = [
        1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 18, 35, 37, 38, 39, 40, 47, 45, 46
    ]
    fairCodes = [31, 32, 33, 34]
    overcastCodes = [26, 27, 28]
    snowCodes = [13, 14, 15, 16, 41, 42, 43]
    uniqueCodes = [
        0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 17, 18, 19, 20, 21, 22, 23, 24, 35, 37,
        38, 39, 40, 44, 45, 46, 47
    ]
    blankCodes = [9, 11, 12, 25, 29, 30, 36, 3200]

    if currentCondition in rainCodes:
        yes_choices = [
            'Yes', 'Yes', 'Yea', 'Yep', 'Ya', 'Grab an umbrella',
            "It's raining"
        ]
        a = random.choice(yes_choices)

    else:
        no_choices = ['No', 'Nah', 'Nope', 'Not raining', 'Not raining']
        a = random.choice(no_choices)

    if currentCondition in fairCodes:
        fair_choices = [
            ", beautiful day", ", clear day", ", nice day", ", fair weather",
            ""
        ]
        comment = random.choice(fair_choices)

    if currentCondition in overcastCodes:
        overcast_choices = [
            ", gloomy", ", cloudy", ", overcast", ", grey skies", ""
        ]
        comment = random.choice(overcast_choices)

    if currentCondition in snowCodes:
        snow_choices = [
            ", snowing", ", snow", ", snowfall", ", snow coming down"
        ]
        comment = random.choice(snow_choices)

    if currentCondition in uniqueCodes:
        uniqueChoice = str(", " + currentText)
        unique_choices = [uniqueChoice, ""]
        comment = random.choice(unique_choices)

    if currentCondition in blankCodes:
        comment = str('')

    if 'PM' in timeStamp:
        timeStamp = "w/ low tonight of "
        tempHL = low
    else:
        timeStamp = "w/ high today of "
        tempHL = high
        q = taskqueue.Queue(city)
        q.purge()

    a = a.rstrip("\r\n")
    comment = comment.rstrip("\r\n")
    comment = comment.lower()
    forecastText = forecastText.lower()

    answer = (a + comment + '.\n' + currentTemp + '° now ' + timeStamp +
              tempHL + '°\n' + "Forecast: " + forecastText + '.')
    logging.info(answer)
    CONSUMER_KEY = config.get(city, 'CONSUMER_KEY')
    CONSUMER_SECRET = config.get(city, 'CONSUMER_SECRET')
    ACCESS_TOKEN = config.get(city, 'ACCESS_TOKEN')
    ACCESS_TOKEN_SECRET = config.get(city, 'ACCESS_TOKEN_SECRET')
    auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
    auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
    api = API(auth)
    result = api.update_status(status=answer)
Beispiel #20
0
class Twitter(ApiInterface):
    def __init__(self) -> None:
        """ Store easy access for keys """
        self.keys = TwitterKey()
        """ Store pointer for OAuth access """
        auth = OAuthHandler(self.keys.consumer_pub, self.keys.consumer_sec)
        auth.set_access_token(self.keys.access_pub, self.keys.access_sec)
        self.auth = auth
        self.api = API(auth)
        """ Store easy access for twitter info operations """
        self.info = TwitterInfo(self.api)
        self.bio = TwitterBio(self.api)
        """ Contains info for real-time graphing """
        self.streamfile = os.path.join('postr', 'twitter',
                                       'twitter_stream.txt')
        self.graphfile = os.path.join('postr', 'twitter',
                                      'twitter_graphing.csv')
        self.blobfile = os.path.join('postr', 'twitter', 'twitter_blob.csv')

    def post_text(self, text: str) -> bool:
        """ Posts a tweet containing text """
        try:
            self.api.update_status(status=text)
            return True
        except BaseException as e:
            print(e)
            return False

    # pylint: disable=no-self-use, unused-argument
    def post_video(self, url: str, text: str) -> bool:
        """ Not applicable """
        return False

    def post_photo(self, url: str, text: str) -> bool:
        """ Posts a tweet with text and a picture """
        try:
            self.api.update_with_media(filename=url, status=text)
            return True
        except BaseException as e:
            print(e)
            return False

    def get_user_followers(self, text: str) -> List[str]:
        """ Gets user followers, note: this is rate limited """
        my_followers = []
        i = 0

        # Use the cursor module for pagination
        for follower in Cursor(self.api.followers, screen_name=text).items():
            my_followers.append(follower.screen_name)
            i += 1

            # Simple rate limit for requests
            if i >= 100:
                i = 0
                time.sleep(1)

        return my_followers

    def remove_post(self, post_id: str) -> bool:
        """ Removes a tweet given its ID """
        try:
            self.api.destroy_status(post_id)
            return True
        except BaseException as e:
            print(e)
            return False

    def stream_tweets(self, hashtags: List[str], output_filename: str) -> None:
        """ Streams tweets from a hashtag and writes data into an output file """
        self.setup_csv()
        twitter_streamer = TwitterStreamer(self.keys, self.graphfile)
        twitter_streamer.stream_tweets(hashtags, output_filename, self.auth)
        print('done streaming')

    def setup_csv(self) -> None:
        """ Initializes a csv file for time series graphing """
        csvData = ['Tweet', 'Time']

        with open(self.graphfile, 'w') as csvFile:
            writer = csv.writer(csvFile)
            writer.writerow(csvData)
            csvFile.close()

    # pylint: disable=no-self-use, unused-argument
    def get_user_likes(self) -> int:
        """ Not applicable, see helper methods in TwitterInfo class"""
        return -1

    def read_csv_col(self, colNum: int, filename: str) -> List[str]:
        """ Reads a specific column by index in the graph csv"""
        col = []
        with open(filename, 'r') as rf:
            reader = csv.reader(rf, delimiter=',')
            for row in reader:
                col.append(str(row[colNum]))

        return col[1::]  # Ignore the csv header

    def analyzeSentiment(self) -> None:
        """ Converts a real-time tweet content into a positivity score"""
        with open(self.blobfile, 'w') as bf:
            writer = csv.writer(bf)
            graph_data = zip(
                self.read_csv_col(0, self.graphfile),
                self.read_csv_col(1, self.graphfile),
            )

            for pair in graph_data:
                text = str(re.sub(r'[^a-zA-Z ]+', '', pair[0]))
                score = Twitter.polarity(text)
                writer.writerow([pair[1], score])

            bf.close()

    @staticmethod
    def polarity(text: str) -> float:
        """ Returns the polarity of text. Made into a separate
            method to provide easy modification if needed in the future """
        return float(TextBlob(text).sentiment.polarity)

    def stream_and_graph(self, hashtags: List[str]) -> None:
        """ Streams tweets in real time, then graphs their sentiment """
        self.stream_tweets(hashtags, self.streamfile)
        self.analyzeSentiment()
        self.graph_blob()

    def graph_blob(self) -> None:
        """ Graphs a blob file for twitter sentiment """
        dates = self.read_csv_col(0, self.blobfile)
        # Truncate the datetime object to the minute precision
        dates = [d[:DATETIME_MILLISECOND_PRECISION] for d in dates]

        # Truncate off scores past a precision for easy viewing on the plot
        scores = list(
            map(lambda x: x[:SCORE_PRECISION],
                self.read_csv_col(1, self.blobfile)))

        plt.plot(
            dates,
            scores,
        )

        plt.ylabel('Positivity Score')
        plt.xlabel('Time')

        # beautify the x-labels
        plt.gcf().autofmt_xdate()

        plt.show()

    def update_bio(self, message: str) -> None:
        """ Sets an authenticated user's bio to a specified message """
        self.api.update_profile(description=message)
Beispiel #21
0
	def get(self):
		verifier = self.request.get("oauth_verifier")
		
		if verifier:
			# Get access token
			handler = auth.OAuthHandler(config.CONSUMER_KEY, config.CONSUMER_SECRET)
			handler.set_request_token(self.session.get("request_token_key"), self.session.get("request_token_secret"))
			access_token = handler.get_access_token(verifier)
			
			if access_token:
				# Get user			
				logging.info("Access token: %s" %(access_token))
				user = User.all().filter("twitter_access_token_key", access_token.key).get()
				
				if((not user) or (user and user.updated < datetime.now() - timedelta(0,86400))):
					logging.info("Connecting to the Twitter API")
					api = API(handler)
					temp_user = api.verify_credentials()
					temp_image = urlfetch.Fetch(str(temp_user.profile_image_url).replace("_normal", "")).content

					# Transform image into .PNG
					image_manager = images.Image(image_data=temp_image)
					image_manager.rotate(360)
					temp_png = image_manager.execute_transforms()
					logging.info("Encoded into .PNG")

					# Save or update image in Cloud storage
					filename = config.FOLDER + "/" + str(temp_user.id)
					gcs_file = gcs.open(filename,'w',content_type="image/png",options={"x-goog-acl":"public-read"})
					gcs_file.write(temp_png)
					gcs_file.close()
					logging.info("Image saved to Google Cloud Storage")

					# Get avatar
					blob_filename = "/gs" + filename
					blobkey = blobstore.create_gs_key(blob_filename)
					temp_avatar = str(images.get_serving_url(blobkey))
								
					if not user:
						logging.info("User did not exist")

						user = User(
							twitter_id = str(temp_user.id),
							twitter_access_token_key = str(access_token.key),
							twitter_access_token_secret = str(access_token.secret),
							username = str(temp_user.screen_name).lower(),
							name = temp_user.name,
							bio = temp_user.description,
							avatar = temp_avatar,
						)
						
					else:
						logging.info("User had to be updated")

						user.twitter_id = str(temp_user.id)
						user.twitter_access_token_key = str(access_token.key)
						user.twitter_access_token_secret = str(access_token.secret)
						user.username = str(temp_user.screen_name).lower()
						user.name = temp_user.name
						user.bio = temp_user.description
						user.avatar = temp_avatar
											
					user.put()
					logging.info("User @%s saved in datastore"%(user.username))
				
				# Save user in session
				self.session["id"] = user.key().id()
				
			else:
				logging.error("No access token from Twitter")
				print "Error"
		else:
			logging.error("No verifier")
			print "Error"
		
		# Redirect users to the page they came from or the page they're supposed to head to
		next = self.session.get("next")
		redirect = self.session.get("referer")
		if next:
			redirect = next
		self.redirect(str(redirect))
Beispiel #22
0
 def __init__(self, api=None):
     self.api = api or API()
     self.n = 0
     self.m = MAX_RECORDS_TO_PROCESS
Beispiel #23
0
 def on_connect():
     API(self.auth).update_status(mock_tweet())
# Tweepy
# Copyright 2009-2010 Joshua Roesslein
# See LICENSE for details.
"""
Tweepy Twitter API library
"""
__version__ = '3.0'
__author__ = 'Joshua Roesslein'
__license__ = 'MIT'

from tweepy.models import Status, User, DirectMessage, Friendship, SavedSearch, SearchResults, ModelFactory, Category
from tweepy.error import TweepError
from tweepy.api import API
from tweepy.cache import Cache, MemoryCache, FileCache
from tweepy.auth import OAuthHandler, AppAuthHandler
from tweepy.streaming import Stream, StreamListener
from tweepy.cursor import Cursor

# Global, unauthenticated instance of API
api = API()


def debug(enable=True, level=1):

    import httplib
    httplib.HTTPConnection.debuglevel = level
            forward = []
            backward = []
            #outfile = sys.argv[1]
            #auth = tweepy.BasicAuthHandler('reza_shz', 'mehdireza')
            auth = tweepy.OAuthHandler(
                "xg2hLKvf1nxw1TUALvx5xA",
                "MkX0lDUik0mJuc6nxserddbQDWd7ZTErQN6Tf0OhOM")
            auth.set_access_token(
                "174566652-MOGbxytlmUHIN5tEMgl5rgqWdWaIQXYZ6XPyYKl1",
                "yem38OfoUbsoPZvOVr3k0n3X7JSUDYD8oxAKXvrJw6k")
            twitterApi = API(auth_handler=auth,
                             host='api.twitter.com',
                             search_host='search.twitter.com',
                             cache=FileCache("cache", timeout=-1),
                             secure=False,
                             api_root='/1',
                             search_root='',
                             retry_count=0,
                             retry_delay=0,
                             retry_errors=None,
                             parser=None)

            #username1, username2,listUsernames = readFile(outfile)
            user1 = twitterApi.get_user(sys.argv[1])  #@UndefinedVariable
            user2 = twitterApi.get_user(sys.argv[2])  #@UndefinedVariable

            forward.append({
                "obj": user1,
                "cursor": -1,
                "friends": [],
                "cursor_obj": -1,
Beispiel #26
0
 def setUp(self):
     self.auth = create_auth()
     self.api = API(self.auth)
Beispiel #27
0
	def __init__(self, auth):
		self.auth = auth
		self.api = API(auth)
Beispiel #28
0
    def __init__(self, api=None):
        self.api = api or API()

        self.data_d = data_json['Discord']
Beispiel #29
0
def go(collection_type, project_id, collector_id, rawdir, logdir):
    if collection_type not in ['track', 'follow', 'none']:
        print("ThreadedCollector accepts inputs 'track', 'follow', or 'none'.")
        print('Exiting with invalid params...')
        sys.exit()
    else:
        # Grab collector & project details from DB
        project = db.get_project_detail(project_id)
        resp = db.get_collector_detail(project_id, collector_id)

        if project['status'] and resp['status']:
            collector = resp['collector']
            configdb = project['project_config_db']
            project_config_db = db.connection[configdb]
            project_config_db = project_config_db.config
            collector_name = collector['collector_name']
            project_name = project['project_name']
        else:
            'Invalid project account & collector. Try again!'

    # module_config = project_config_db.find_one({'module': 'twitter'})

    # Reference for controller if script is active or not.
    project_config_db.update({'_id': ObjectId(collector_id)},
                             {'$set': {
                                 'active': 1
                             }})

    Config = configparser.ConfigParser()
    Config.read(PLATFORM_CONFIG_FILE)

    # Creates logger w/ level INFO
    logger = logging.getLogger(collector_name)
    logger.setLevel(logging.INFO)
    # Creates rotating file handler w/ level INFO
    fh = logging.handlers.TimedRotatingFileHandler(
        logdir + '/' + project_name + '-' + collector_name + '-' +
        collection_type + '-collector-log-' + collector_id + '.out', 'D', 1,
        30, None, False, False)
    fh.setLevel(logging.INFO)
    # Creates formatter and applies to rotating handler
    format = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'
    datefmt = '%m-%d %H:%M'
    formatter = logging.Formatter(format, datefmt)
    fh.setFormatter(formatter)
    # Finishes by adding the rotating, formatted handler
    logger.addHandler(fh)

    # Sets current date as starting point
    tmpDate = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    logger.info('Starting collection system at %s' % tmpDate)
    logger.info('Collector name: %s' % collector_name)

    # Grabs tweets out file info from config
    # TODO - move this info to Mongo
    tweetsOutFilePath = rawdir + '/'
    if not os.path.exists(tweetsOutFilePath):
        os.makedirs(tweetsOutFilePath)
    tweetsOutFileDateFrmt = Config.get('files', 'tweets_file_date_frmt', 0)
    tweetsOutFile = Config.get('files', 'tweets_file', 0)

    # NOTE - proper naming for api_auth dictionary from front_end
    oauth_info = collector['api_auth']

    consumerKey = oauth_info['consumer_key']
    consumerSecret = oauth_info['consumer_secret']
    accessToken = oauth_info['access_token']
    accessTokenSecret = oauth_info['access_token_secret']

    # Authenticates via app info
    auth = OAuthHandler(consumerKey, consumerSecret)
    auth.set_access_token(accessToken, accessTokenSecret)

    # Sets Mongo collection; sets rate_limitng & error counts to 0
    if 'stream_limit_loss' not in collector:
        project_config_db.update(
            {'_id': ObjectId(collector_id)},
            {'$set': {
                'stream_limit_loss': {
                    'counts': [],
                    'total': 0
                }
            }})

    if 'rate_limit_count' not in collector:
        project_config_db.update({'_id': ObjectId(collector_id)},
                                 {'$set': {
                                     'rate_limit_count': 0
                                 }})

    if 'error_code' not in collector:
        project_config_db.update({"_id": ObjectId(collector_id)},
                                 {'$set': {
                                     'error_code': 0
                                 }})

    runCollector = collector['collector']['run']

    if runCollector:
        print('Starting process w/ start signal %d' % runCollector)
        logger.info('Starting process w/ start signal %d' % runCollector)
    collectingData = False

    i = 0
    myThreadCounter = 0
    runLoopSleep = 0

    while runCollector:
        i += 1

        # Finds Mongo collection & grabs signal info
        # If Mongo is offline throws an acception and continues
        exception = None
        try:
            resp = db.get_collector_detail(project_id, collector_id)
            collector = resp['collector']
            flags = collector['collector']
            runCollector = flags['run']
            collectSignal = flags['collect']
            updateSignal = flags['update']
        except Exception as exception:
            logger.info('Mongo connection refused with exception: %s' %
                        exception)
        """
        Collection process is running, and:
        A) An update has been triggered -OR-
        B) The collection signal is not set -OR-
        C) Run signal is not set
        """
        if collectingData and (updateSignal or not collectSignal
                               or not runCollector):
            # Update has been triggered
            if updateSignal:
                logger.info(
                    'MAIN: received UPDATE signal. Attempting to stop collection thread'
                )
                resp = db.set_collector_status(project_id,
                                               collector_id,
                                               collector_status=1)
            # Collection thread triggered to stop
            if not collectSignal:
                logger.info(
                    'MAIN: received STOP signal. Attempting to stop collection thread'
                )
            # Entire process trigerred to stop
            if not runCollector:
                logger.info(
                    'MAIN: received EXIT signal. Attempting to stop collection thread'
                )
                resp = db.set_collector_status(project_id,
                                               collector_id,
                                               collector_status=0)
                collectSignal = 0

            # Send stream disconnect signal, kills thread
            stream.disconnect()
            wait_count = 0
            while e.isSet() is False:
                wait_count += 1
                print('%d) Waiting on collection thread shutdown' % wait_count)
                sleep(wait_count)

            collectingData = False

            logger.info('COLLECTION THREAD: stream stopped after %d tweets' %
                        l.tweet_count)
            logger.info('COLLECTION THREAD: collected %d error tweets' %
                        l.delete_count)
            print('COLLECTION THREAD: collected %d error tweets' %
                  l.delete_count)
            logger.info(
                'COLLECTION THREAD: lost %d tweets to stream rate limit' %
                l.limit_count)
            print('COLLECTION THREAD: lost %d tweets to stream rate limit' %
                  l.limit_count)
            print('COLLECTION THREAD: stream stopped after %d tweets' %
                  l.tweet_count)

            if not l.error_code == 0:
                resp = db.set_collector_status(project_id,
                                               collector_id,
                                               collector_status=0)
                project_config_db.update(
                    {"_id": ObjectId(collector_id)},
                    {'$set': {
                        'error_code': l.error_code
                    }})

            if not l.limit_count == 0:
                project_config_db.update(
                    {'_id': ObjectId(collector_id)},
                    {'$set': {
                        'stream_limit_loss.total': l.limit_count
                    }})

            if not l.rate_limit_count == 0:
                project_config_db.update({'_id': ObjectId(collector_id)},
                                         {'$set': {
                                             'rate_limit_count': 0
                                         }})

        # Collection has been signaled & main program thread is running
        # TODO - Check Mongo for handle:ID pairs
        # Only call for new pairs
        if collectSignal and (threading.activeCount() == 1):
            # Names collection thread & adds to counter
            myThreadCounter += 1
            myThreadName = 'collector-' + collection_type + '%s' % myThreadCounter

            termsList = collector['terms_list']
            if termsList:
                print('Terms list length: ' + str(len(termsList)))

                # Grab IDs for follow stream
                if collection_type == 'follow':
                    """
                    TODO - Update Mongo terms w/ set for collect status 0 or 1
                    # Updates current stored handles to collect 0 if no longer listed in terms file
                    stored_terms = doc['termsList']
                    for user in stored_terms:
                        if user['handle'] not in termsList:
                            user_id = user['id']
                            mongo_config.update({'module': 'collector-follow'},
                                {'$pull': {'termsList': {'handle': user['handle']}}})
                            mongo_config.update({'module': 'collecting-follow'},
                                {'$set': {'termsList': {'handle': user['handle'], 'id': user_id, 'collect': 0 }}})

                    # Loops thru current stored handles and adds list if both:
                    #   A) Value isn't set to None (not valid OR no longer in use)
                    all_stored_handles = [user['handle'] for user in stored_terms]
                    stored_handles = [user['handle'] for user in stored_terms if user['id'] and user['collect']]

                    print 'MAIN: %d user ids for collection found in Mongo!' % len(stored_handles)
                    """

                    # Loop thru & query (except handles that have been stored)
                    print('MAIN: Querying Twitter API for handle:id pairs...')
                    logger.info(
                        'MAIN: Querying Twitter API for handle:id pairs...')
                    # Initiates REST API connection
                    twitter_api = API(auth_handler=auth)
                    failed_handles = []
                    success_handles = []
                    # Loops thru user-given terms list
                    for item in termsList:
                        term = item['term']
                        # If term already has a valid ID, pass
                        if item['id'] is not None:
                            pass
                        # Queries the Twitter API for the ID value of the handle
                        else:
                            try:
                                user = twitter_api.get_user(screen_name=term)
                            except TweepError as tweepy_exception:
                                error_message = tweepy_exception.args[0][0][
                                    'message']
                                code = tweepy_exception.args[0][0]['code']
                                # Rate limited for 15 minutes w/ code 88
                                if code == 88:
                                    print(
                                        'MAIN: User ID grab rate limited. Sleeping for 15 minutes.'
                                    )
                                    logger.exception(
                                        'MAIN: User ID grab rate limited. Sleeping for 15 minutes.'
                                    )
                                    time.sleep(900)
                                # Handle doesn't exist, added to Mongo as None
                                elif code == 34:
                                    print(
                                        'MAIN: User w/ handle %s does not exist.'
                                        % term)
                                    logger.exception(
                                        'MAIN: User w/ handle %s does not exist.'
                                        % term)
                                    item['collect'] = 0
                                    item['id'] = None
                                    failed_handles.append(term)
                            # Success - handle:ID pair stored in Mongo
                            else:
                                user_id = user._json['id_str']
                                item['id'] = user_id
                                success_handles.append(term)

                    print('MAIN: Collected %d new ids for follow stream.' %
                          len(success_handles))
                    logger.info(
                        'MAIN: Collected %d new ids for follow stream.' %
                        len(success_handles))
                    print('MAIN: %d handles failed to be found.' %
                          len(failed_handles))
                    logger.info('MAIN: %d handles failed to be found.' %
                                len(failed_handles))
                    logger.info(failed_handles)
                    print(failed_handles)
                    print(
                        'MAIN: Grabbing full list of follow stream IDs from Mongo.'
                    )
                    logger.info(
                        'MAIN: Grabbing full list of follow stream IDs from Mongo.'
                    )

                    # Updates term list with follow values
                    project_config_db.update(
                        {'_id': ObjectId(collector_id)},
                        {'$set': {
                            'terms_list': termsList
                        }})

                    # Loops thru current stored handles and adds to list if:
                    #   A) Value isn't set to None (not valid OR no longer in use)
                    ids = [
                        item['id'] for item in termsList
                        if item['id'] and item['collect']
                    ]
                    noncoll = [
                        item['term'] for item in termsList
                        if not item['collect']
                    ]
                    termsList = ids
                else:
                    terms = [
                        item['term'] for item in termsList if item['collect']
                    ]
                    noncoll = [
                        item['term'] for item in termsList
                        if not item['collect']
                    ]
                    termsList = terms

                print('Terms List: ')
                print(termsList)
                print('')
                print('Not collecting for: ')
                print(noncoll)
                print('')

                logger.info('Terms list: %s' % str(termsList).strip('[]'))
                logger.info('Not collecting for: %s' %
                            str(noncoll).strip('[]'))

            print(
                'COLLECTION THREAD: Initializing Tweepy listener instance...')
            logger.info(
                'COLLECTION THREAD: Initializing Tweepy listener instance...')
            l = fileOutListener(tweetsOutFilePath, tweetsOutFileDateFrmt,
                                tweetsOutFile, logger, collection_type,
                                project_id, collector_id)

            print('TOOLKIT STREAM: Initializing Tweepy stream listener...')
            logger.info(
                'TOOLKIT STREAM: Initializing Tweepy stream listener...')

            # Initiates async stream via Tweepy, which handles the threading
            # TODO - location & language

            languages = collector['languages']
            location = collector['location']

            if languages:
                print('%s language codes found!' % len(languages))
            if location:
                print('Location points found!')
                for i in range(len(location)):
                    location[i] = float(location[i])

            stream = ToolkitStream(auth,
                                   l,
                                   logger,
                                   project_id,
                                   collector_id,
                                   retry_count=100)
            if collection_type == 'track':
                stream.filter(track=termsList,
                              languages=languages,
                              locations=location,
                              is_async=True)
            elif collection_type == 'follow':
                stream.filter(follow=termsList,
                              languages=languages,
                              locations=location,
                              is_async=True)
            elif collection_type == 'none':
                stream.filter(locations=location,
                              languages=languages,
                              is_async=True)
            else:
                sys.exit('ERROR: Unrecognized stream filter.')

            collectingData = True
            print('MAIN: Collection thread started (%s)' % myThreadName)
            logger.info('MAIN: Collection thread started (%s)' % myThreadName)

        # if threading.activeCount() == 1:
        #    print "MAIN: %d iteration with no collection thread running" % i
        # else:
        #    print "MAIN: %d iteration with collection thread running (%d)" % (i, threading.activeCount())

        # Incrementally delays loop if Mongo is offline, otherwise 2 seconds
        max_sleep_time = 1800
        if exception:
            if runLoopSleep < max_sleep_time:
                runLoopSleep += 2
            else:
                runLoopSleep = max_sleep_time
            print("Exception caught, sleeping for: %d" % runLoopSleep)
            time.sleep(runLoopSleep)
        else:
            time.sleep(2)

    logger.info('Exiting Collection Program...')
    print('Exiting Collection Program...')

    # Reference for controller if script is active or not.
    project_config_db.update({'_id': ObjectId(collector_id)},
                             {'$set': {
                                 'active': 0
                             }})
 def __init__(self, auth):
     self.auth = auth
     self.api = API(self.auth)
        'Please specify the key via environment var or in source code')


input_data_path = err_for_input()
outfile = 'outfile.csv'

pd_df = pandas.read_csv(input_data_path)
print(pd_df.dtypes)
e = os.environ

auth = tweepy.auth.OAuthHandler(consumer_key=e.get('CONSUMER_KEY',
                                                   err_for_key()),
                                consumer_secret=e.get('CONSUMER_SECRET',
                                                      err_for_key()))

client = API(auth_handler=auth)


def get_chunks_of_n(n):
    # If I was doing this again i would have used more_itertools.chunked, but this is already written
    counter = count()
    rows = pd_df.iterrows()
    res = []
    count_val = 0
    while count_val < len(pd_df):
        try:
            count_val = next(counter)
            if count_val > 0 and count_val % n == 0:
                yield res
                counter = count()
                res = []
Beispiel #32
0
        logger.info('Streamに接続しました')
        return

    def on_disconnect(self, notice):
        logger.info('Streamから切断されました:' + str(notice.code))
        return

    def on_limit(self, track):
        logger.warning('受信リミットが発生しました:' + str(track))
        return

    def on_timeout(self):
        logger.info('タイムアウト')
        return True

    def on_warning(self, notice):
        logger.warning('警告メッセージ:' + str(notice.message))
        return

    def on_exception(self, exception):
        logger.error('例外エラー:' + str(exception))
        return True


# main
if __name__ == '__main__':
    auth = get_oauth()
    api = API(auth)
    stream = Stream(auth, Listener(), secure=True)
    stream.userstream()
Beispiel #33
0
 def __init__(self, streambot, api=None):
     self.api = api or API()
     # needed ref to streambot so method can be called
     self.streambot = streambot
     self.tw_bot_id = 841013993602863104
     self.ignored_users = []
 def __init__(self, api=None):
     self.api = api or API()
     self.n = 0
     self.m = 20
                            {'$pull': {'termsList': {'handle': user['handle']}}})
                        mongo_config.update({'module': 'collecting-follow'},
                            {'$set': {'termsList': {'handle': user['handle'], 'id': user_id, 'collect': 0 }}})

                # Loops thru current stored handles and adds list if both:
                #   A) Value isn't set to None (not valid OR no longer in use)
                all_stored_handles = [user['handle'] for user in stored_terms]
                stored_handles = [user['handle'] for user in stored_terms if user['id'] and user['collect']]

                print 'MAIN: %d user ids for collection found in Mongo!' % len(stored_handles)

                # Loop thru & query (except handles that have been stored)
                print 'MAIN: Querying Twitter API for new handle:id pairs...'
                logger.info('MAIN: Querying Twitter API for new handle:id pairs...')
                # Initiates REST API connection
                twitter_api = API(auth_handler=auth)
                failed_handles = []
                success_handles = []
                # Loops thru user-given terms list
                for handle in termsList:
                    # If handle already stored, no need to query for ID
                    if handle in stored_handles:
                        pass
                    # Queries the Twitter API for the ID value of the handle
                    else:
                        try:
                            user = twitter_api.get_user(screen_name=handle)
                        except TweepError as tweepy_exception:
                            error_message = tweepy_exception.args[0][0]['message']
                            code = tweepy_exception.args[0][0]['code']
                            # Rate limited for 15 minutes w/ code 88
Beispiel #36
0
def go(collection_type, project_id, collector_id, rawdir, logdir):
    if collection_type not in ['track', 'follow', 'none']:
        print "ThreadedCollector accepts inputs 'track', 'follow', or 'none'."
        print 'Exiting with invalid params...'
        sys.exit()
    else:
        # Grab collector & project details from DB
        project = db.get_project_detail(project_id)
        resp = db.get_collector_detail(project_id, collector_id)

        if project['status'] and resp['status']:
            collector = resp['collector']
            configdb = project['project_config_db']
            project_config_db = db.connection[configdb]
            project_config_db = project_config_db.config
            collector_name = collector['collector_name']
            project_name = project['project_name']
        else:
            'Invalid project account & collector. Try again!'

    # module_config = project_config_db.find_one({'module': 'twitter'})

    # Reference for controller if script is active or not.
    project_config_db.update({'_id': ObjectId(collector_id)}, {'$set': {'active': 1}})

    Config = ConfigParser.ConfigParser()
    Config.read(PLATFORM_CONFIG_FILE)

    # Creates logger w/ level INFO
    logger = logging.getLogger(collector_name)
    logger.setLevel(logging.INFO)
    # Creates rotating file handler w/ level INFO
    fh = logging.handlers.TimedRotatingFileHandler(logdir + '/' + project_name + '-' + collector_name + '-' + collection_type + '-collector-log-' + collector_id + '.out', 'D', 1, 30, None, False, False)
    fh.setLevel(logging.INFO)
    # Creates formatter and applies to rotating handler
    format = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'
    datefmt = '%m-%d %H:%M'
    formatter = logging.Formatter(format, datefmt)
    fh.setFormatter(formatter)
    # Finishes by adding the rotating, formatted handler
    logger.addHandler(fh)

    # Sets current date as starting point
    tmpDate = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    logger.info('Starting collection system at %s' % tmpDate)
    logger.info('Collector name: %s' % collector_name)

    # Grabs tweets out file info from config
    # TODO - move this info to Mongo
    tweetsOutFilePath = rawdir + '/'
    if not os.path.exists(tweetsOutFilePath):
        os.makedirs(tweetsOutFilePath)
    tweetsOutFileDateFrmt = Config.get('files', 'tweets_file_date_frmt', 0)
    tweetsOutFile = Config.get('files', 'tweets_file', 0)

    # NOTE - proper naming for api_auth dictionary from front_end
    oauth_info = collector['api_auth']

    consumerKey = oauth_info['consumer_key']
    consumerSecret = oauth_info['consumer_secret']
    accessToken = oauth_info['access_token']
    accessTokenSecret = oauth_info['access_token_secret']

    # Authenticates via app info
    auth = OAuthHandler(consumerKey, consumerSecret)
    auth.set_access_token(accessToken, accessTokenSecret)

    # Sets Mongo collection; sets rate_limitng & error counts to 0
    if 'stream_limit_loss' not in collector:
        project_config_db.update({'_id': ObjectId(collector_id)}, {'$set' : { 'stream_limit_loss': { 'counts': [], 'total': 0 }}})

    if 'rate_limit_count' not in collector:
        project_config_db.update({'_id': ObjectId(collector_id)}, {'$set': {'rate_limit_count': 0}})

    if 'error_code' not in collector:
        project_config_db.update({"_id" : ObjectId(collector_id)}, {'$set' : {'error_code': 0}})

    runCollector = collector['collector']['run']

    if runCollector:
        print 'Starting process w/ start signal %d' % runCollector
        logger.info('Starting process w/ start signal %d' % runCollector)
    collectingData = False

    i = 0
    myThreadCounter = 0
    runLoopSleep = 0

    while runCollector:
        i += 1

        # Finds Mongo collection & grabs signal info
        # If Mongo is offline throws an acception and continues
        exception = None
        try:
            resp = db.get_collector_detail(project_id, collector_id)
            collector = resp['collector']
            flags = collector['collector']
            runCollector = flags['run']
            collectSignal = flags['collect']
            updateSignal = flags['update']
        except Exception, exception:
            logger.info('Mongo connection refused with exception: %s' % exception)

        """
        Collection process is running, and:
        A) An update has been triggered -OR-
        B) The collection signal is not set -OR-
        C) Run signal is not set
        """
        if collectingData and (updateSignal or not collectSignal or not runCollector):
            # Update has been triggered
            if updateSignal:
                logger.info('MAIN: received UPDATE signal. Attempting to stop collection thread')
                resp = db.set_collector_status(project_id, collector_id, collector_status=1)
            # Collection thread triggered to stop
            if not collectSignal:
                logger.info('MAIN: received STOP signal. Attempting to stop collection thread')
            # Entire process trigerred to stop
            if not runCollector:
                logger.info('MAIN: received EXIT signal. Attempting to stop collection thread')
                resp = db.set_collector_status(project_id, collector_id, collector_status=0)
                collectSignal = 0

            # Send stream disconnect signal, kills thread
            stream.disconnect()
            wait_count = 0
            while e.isSet() is False:
                wait_count += 1
                print '%d) Waiting on collection thread shutdown' % wait_count
                sleep(wait_count)

            collectingData = False

            logger.info('COLLECTION THREAD: stream stopped after %d tweets' % l.tweet_count)
            logger.info('COLLECTION THREAD: collected %d error tweets' % l.delete_count)
            print 'COLLECTION THREAD: collected %d error tweets' % l.delete_count
            logger.info('COLLECTION THREAD: lost %d tweets to stream rate limit' % l.limit_count)
            print 'COLLECTION THREAD: lost %d tweets to stream rate limit' % l.limit_count
            print 'COLLECTION THREAD: stream stopped after %d tweets' % l.tweet_count

            if not l.error_code == 0:
                resp = db.set_collector_status(project_id, collector_id, collector_status=0)
                project_config_db.update({"_id" : ObjectId(collector_id)}, {'$set' : {'error_code': l.error_code}})

            if not l.limit_count == 0:
                project_config_db.update({'_id': ObjectId(collector_id)}, {'$set' : { 'stream_limit_loss.total': l.limit_count}})

            if not l.rate_limit_count == 0:
                project_config_db.update({'_id': ObjectId(collector_id)}, {'$set': {'rate_limit_count': 0}})

        # Collection has been signaled & main program thread is running
        # TODO - Check Mongo for handle:ID pairs
        # Only call for new pairs
        if collectSignal and (threading.activeCount() == 1):
            # Names collection thread & adds to counter
            myThreadCounter += 1
            myThreadName = 'collector-' + collection_type + '%s' % myThreadCounter

            termsList = collector['terms_list']
            if termsList:
                print 'Terms list length: ' + str(len(termsList))

                # Grab IDs for follow stream
                if collection_type == 'follow':
                    """
                    TODO - Update Mongo terms w/ set for collect status 0 or 1
                    # Updates current stored handles to collect 0 if no longer listed in terms file
                    stored_terms = doc['termsList']
                    for user in stored_terms:
                        if user['handle'] not in termsList:
                            user_id = user['id']
                            mongo_config.update({'module': 'collector-follow'},
                                {'$pull': {'termsList': {'handle': user['handle']}}})
                            mongo_config.update({'module': 'collecting-follow'},
                                {'$set': {'termsList': {'handle': user['handle'], 'id': user_id, 'collect': 0 }}})

                    # Loops thru current stored handles and adds list if both:
                    #   A) Value isn't set to None (not valid OR no longer in use)
                    all_stored_handles = [user['handle'] for user in stored_terms]
                    stored_handles = [user['handle'] for user in stored_terms if user['id'] and user['collect']]

                    print 'MAIN: %d user ids for collection found in Mongo!' % len(stored_handles)
                    """

                    # Loop thru & query (except handles that have been stored)
                    print 'MAIN: Querying Twitter API for handle:id pairs...'
                    logger.info('MAIN: Querying Twitter API for handle:id pairs...')
                    # Initiates REST API connection
                    twitter_api = API(auth_handler=auth)
                    failed_handles = []
                    success_handles = []
                    # Loops thru user-given terms list
                    for item in termsList:
                        term = item['term']
                        # If term already has a valid ID, pass
                        if item['id'] is not None:
                            pass
                        # Queries the Twitter API for the ID value of the handle
                        else:
                            try:
                                user = twitter_api.get_user(screen_name=term)
                            except TweepError as tweepy_exception:
                                error_message = tweepy_exception.args[0][0]['message']
                                code = tweepy_exception.args[0][0]['code']
                                # Rate limited for 15 minutes w/ code 88
                                if code == 88:
                                    print 'MAIN: User ID grab rate limited. Sleeping for 15 minutes.'
                                    logger.exception('MAIN: User ID grab rate limited. Sleeping for 15 minutes.')
                                    time.sleep(900)
                                # Handle doesn't exist, added to Mongo as None
                                elif code == 34:
                                    print 'MAIN: User w/ handle %s does not exist.' % term
                                    logger.exception('MAIN: User w/ handle %s does not exist.' % term)
                                    item['collect'] = 0
                                    item['id'] = None
                                    failed_handles.append(term)
                            # Success - handle:ID pair stored in Mongo
                            else:
                                user_id = user._json['id_str']
                                item['id'] = user_id
                                success_handles.append(term)

                    print 'MAIN: Collected %d new ids for follow stream.' % len(success_handles)
                    logger.info('MAIN: Collected %d new ids for follow stream.' % len(success_handles))
                    print 'MAIN: %d handles failed to be found.' % len(failed_handles)
                    logger.info('MAIN: %d handles failed to be found.' % len(failed_handles))
                    logger.info(failed_handles)
                    print failed_handles
                    print 'MAIN: Grabbing full list of follow stream IDs from Mongo.'
                    logger.info('MAIN: Grabbing full list of follow stream IDs from Mongo.')

                    # Updates term list with follow values
                    project_config_db.update({'_id': ObjectId(collector_id)},
                        {'$set': {'terms_list': termsList}})

                    # Loops thru current stored handles and adds to list if:
                    #   A) Value isn't set to None (not valid OR no longer in use)
                    ids = [item['id'] for item in termsList if item['id'] and item['collect']]
                    noncoll = [item['term'] for item in termsList if not item['collect']]
                    termsList = ids
                else:
                    terms = [item['term'] for item in termsList if item['collect']]
                    noncoll = [item['term'] for item in termsList if not item['collect']]
                    termsList = terms

                print 'Terms List: '
                print termsList
                print ''
                print 'Not collecting for: '
                print noncoll
                print ''

                logger.info('Terms list: %s' % str(termsList).strip('[]'))
                logger.info('Not collecting for: %s' % str(noncoll).strip('[]'))

            print 'COLLECTION THREAD: Initializing Tweepy listener instance...'
            logger.info('COLLECTION THREAD: Initializing Tweepy listener instance...')
            l = fileOutListener(tweetsOutFilePath, tweetsOutFileDateFrmt, tweetsOutFile, logger, collection_type, project_id, collector_id)

            print 'TOOLKIT STREAM: Initializing Tweepy stream listener...'
            logger.info('TOOLKIT STREAM: Initializing Tweepy stream listener...')

            # Initiates async stream via Tweepy, which handles the threading
            # TODO - location & language

            languages = collector['languages']
            location = collector['location']

            if languages:
                print '%s language codes found!' % len(languages)
            if location:
                print 'Location points found!'
                for i in range(len(location)):
                    location[i] = float(location[i])

            stream = ToolkitStream(auth, l, logger, project_id, collector_id, retry_count=100)
            if collection_type == 'track':
                stream.filter(track=termsList, languages=languages, locations=location, async=True)
            elif collection_type == 'follow':
                stream.filter(follow=termsList, languages=languages, locations=location, async=True)
            elif collection_type == 'none':
                stream.filter(locations=location, languages=languages, async=True)
            else:
                sys.exit('ERROR: Unrecognized stream filter.')

            collectingData = True
            print 'MAIN: Collection thread started (%s)' % myThreadName
            logger.info('MAIN: Collection thread started (%s)' % myThreadName)


        #if threading.activeCount() == 1:
        #    print "MAIN: %d iteration with no collection thread running" % i
        #else:
        #    print "MAIN: %d iteration with collection thread running (%d)" % (i, threading.activeCount())

        # Incrementally delays loop if Mongo is offline, otherwise 2 seconds
        max_sleep_time = 1800
        if exception:
            if runLoopSleep < max_sleep_time:
                runLoopSleep += 2
            else:
                runLoopSleep = max_sleep_time
            print "Exception caught, sleeping for: %d" % runLoopSleep
            time.sleep(runLoopSleep)
        else:
            time.sleep( 2 )
 def __init__(self):
     self.api = API()
     self.config = ConfigParser.ConfigParser()
     self.config.read(os.path.abspath('config.ini'))
     self.r = redis.StrictRedis(host=self.config.get('DB', 'host'),
                                port=self.config.get('DB', 'port'))
Beispiel #38
0

if __name__ == "__main__":
    while 1:
        try:
            auth = tweepy.OAuthHandler("xg2hLKvf1nxw1TUALvx5xA", "MkX0lDUik0mJuc6nxserddbQDWd7ZTErQN6Tf0OhOM")
            auth.set_access_token(
                "174566652-MOGbxytlmUHIN5tEMgl5rgqWdWaIQXYZ6XPyYKl1", "yem38OfoUbsoPZvOVr3k0n3X7JSUDYD8oxAKXvrJw6k"
            )

            twitterApi = API(
                auth_handler=auth,
                host="api.twitter.com",
                search_host="search.twitter.com",
                cache=DBFileCache(DBCache(timeout=-1), FileCache("cache", timeout=-1), timeout=-1),
                secure=False,
                api_root="/1",
                search_root="",
                retry_count=0,
                retry_delay=0,
                retry_errors=None,
                parser=None,
            )
            i = random.randint(1, 1000000000)
            u = handle_func(twitterApi.get_user, user_id=i)
            tweets = u.timeline(count=100, include_rts=1)
            friends = twitterApi.friends_ids(user_id=u.id)
            followers = twitterApi.followers_ids(user_id=u.id)
        except Exception, e:
            print e
Beispiel #39
0
def update_reply(text, reply_id, screen_name):
    auth = get_oauth()
    api = API(auth)
    st = "@" + str(screen_name) + " " + str(text)
    api.update_status(status=st, in_reply_to_status_id=reply_id)
Beispiel #40
0
 def setUp(self):
     self.auth = create_auth()
     self.api = API(self.auth)
     self.api.retry_count = 2
     self.api.retry_delay = 0 if use_replay else 5
Beispiel #41
0
def update_reply(text, reply_id, screen_name):
    auth = get_oauth()
    api = API(auth)
    st = "@" + str(screen_name) + " " + str(text)
    api.update_status(status=st, in_reply_to_status_id=reply_id)
Beispiel #42
0
 def __init__(self, api=None):
     self.api = api or API()
class GetTwitterData():
    def __init__(self, auth):
        self.auth = auth
        self.api = API(self.auth)

    def get_all_tweets(self, screen_name, tweet_count):

        # initialize a list to hold all the tweepy Tweets
        alltweets = []

        # make initial request for most recent tweets (200 is the maximum allowed count)
        new_tweets = self.api.user_timeline(screen_name=screen_name, count=tweet_count)

        # save most recent tweets
        alltweets.extend(new_tweets)

        # save the id of the oldest tweet less one
        oldest = alltweets[-1].id - 1

        # keep grabbing tweets until there are no tweets left to grab
        while len(new_tweets) > 0:
            print "getting tweets before %s" % (oldest)

            # all subsiquent requests use the max_id param to prevent duplicates
            new_tweets = self.api.user_timeline(screen_name=screen_name, count=tweet_count, max_id=oldest)

            # save most recent tweets
            alltweets.extend(new_tweets)

            # update the id of the oldest tweet less one
            oldest = alltweets[-1].id - 1

            print "...%s tweets downloaded so far" % (len(alltweets))

        # transform the tweepy tweets into a 2D array that will populate the csv
        out_tweets = [[screen_name, tweet.id_str, tweet.created_at, tweet.text.encode("utf-8")] for tweet in alltweets]

        # write the csv
        with open(users_tweets_path, 'wb') as f:
            writer = csv.writer(f)
            writer.writerow(["screen_name", "id", "created_at", "text"])
            writer.writerows(out_tweets)

        pass

    def new_get_all_tweets(self, screen_name, tweet_count):

        # make initial request for most recent tweets (200 is the maximum allowed count)
        new_tweets = self.api.user_timeline(screen_name=screen_name, count=tweet_count)

        # transform the tweepy tweets into a 2D array that will populate the csv
        out_tweets = [[screen_name, tweet.id_str, tweet.created_at, tweet.text.encode("utf-8")] for tweet in new_tweets]
        # write the csv
        with open(users_tweets_path, 'wb') as f:
            writer = csv.writer(f)
            writer.writerow(["screen_name", "id", "created_at", "text"])
            writer.writerows(out_tweets)

    def find_friends(self, screen_name):

        print "screen_name: " + screen_name

        # page = self.api.followers_ids(screen_name=screen_name)

        for id in Cursor(self.api.followers_ids, screen_name=screen_name,count=50).pages():
            print  id
            print "ids are: " + str(len(id))
            # if (len(id) > 90):
            #     array_offset = (len(id) % 90)
            #     friends_list=[]
            #     for x in range(1, array_offset):
            #         print "cutted id is:"
            #         print id[((x - 1) * 90):(x * 90)]
            #         friends = [user.screen_name for user in self.api.lookup_users(user_ids=str(id[((x - 1) * 90):(x * 90)]))]
            #         friends_list.extend(friends)
            #
            # else:
            #     friends_list = [user.screen_name for user in self.api.lookup_users(user_ids=id)]
            friends_list = [user.screen_name for user in self.api.lookup_users(user_ids=id)]
            print "list of users\n"
            print friends_list
            friends_list_output = [[screen_name, id[indx], friend]
                                   for indx, friend in enumerate(friends_list)]
            print friends_list_output
            with open(users_friends_path, 'ab') as f:
                writer = csv.writer(f)
                writer.writerow(["screen_name", "id", "friends"])
                writer.writerows(friends_list_output)

            time.sleep(1)

    def readfile(self):
        tweets_data = []
        tweets_file = open(tweets_data_path, "r")
        for line in tweets_file:
            try:
                tweet = json.loads(line)
                tweets_data.append(tweet)
            except:
                continue

        print len(tweets_data)
        counter = 1
        screen_names = []
        for tweet in tweets_data:
            screen_names.extend(tweet['user']['screen_name'])
            try:
                self.new_get_all_tweets(tweet['user']['screen_name'], 5)
            except Exception, e:
                print "error:\n"
                print str(e)

            try:
                print tweet['user']['screen_name']
                self.find_friends(tweet['user']['screen_name'])
            except Exception, e:
                print "fail:\n"
                print str(e)
            print counter
            # print 'tweet:' + tweet['text'] + "\n"
            # print 'user name:' + tweet['user']['name'] + "\n"
            # print 'user id:' + str(tweet['user']['id_str']) + "\n"
            # print "\nuser is flowing \n"
            # print 'user name:' + tweet['user']['name']
            #
            # print "\n>>>>>>>>>>>>\n"
            counter = counter + 1
Beispiel #44
0
	def __init__(self, queue=None, api=None):
		self.messagequeue = queue
		self.api = api or API()
Beispiel #45
0
# -*- coding: utf-8 -*-
'''
Created on 2011-8-27

@author: redswallow
'''

from tweepy.auth import OAuthHandler
from tweepy.api import API

consumer_key="o2K22DnJqSG0STjRbLUA";consumer_secret ="SV7I5YxQ8ehDCEBnKmCHYMTJW0Z0MLt3kpEdW9KhaCo"
token="25798843-PyBwBx4AWqjUuSAm9yoKQuSvEtZQR78IEsuB7xGw";tokenSecret="nWhpP3g44eciBs0Db5SXQc8HJ0G53Rd2v4sAGJy3aTU"

#get_api
auth=OAuthHandler(consumer_key,consumer_secret)
auth.set_access_token(token, tokenSecret)
api=API(auth)

api.update_status(u"oauth登录成功~~")
Beispiel #46
0
import sys,random,math,time
sys.path.append("../lib/")

from tweepy import api, error
from tweepy.cache import FileCache,DBCache, DBFileCache
from tweepy.api import API
import tweepy
from tweepy.models import *
from tweeapi import APISingleton

if __name__ == "__main__":
    while 1:
        try:
	    auth = tweepy.OAuthHandler("xg2hLKvf1nxw1TUALvx5xA", "MkX0lDUik0mJuc6nxserddbQDWd7ZTErQN6Tf0OhOM")
            auth.set_access_token("174566652-MOGbxytlmUHIN5tEMgl5rgqWdWaIQXYZ6XPyYKl1", "yem38OfoUbsoPZvOVr3k0n3X7JSUDYD8oxAKXvrJw6k")
            twitterApi = API(auth_handler=auth,
                    host='api.twitter.com', search_host='search.twitter.com',
                     cache=None, secure=False, api_root='/1', search_root='',
                    retry_count=0, retry_delay=0, retry_errors=None,
                    parser=None)
	    
	    ret = twitterApi.rate_limit_status()
	    print ret
	    sys.exit(0)
	except Exception,e:
	    print e
	    pass
Beispiel #47
0
 def __init__(self, api=None, interrupt=False, queue=None):
     self.api = api or API()  #api access to Twitter
     self.interrupt = interrupt  #interrupt signal for ending stream.filter
     self.queue = queue  #place the tweets in a queue
Beispiel #48
0
def update_tweet(text):
    auth = get_oauth()
    api = API(auth)
    api.update_status(status=text)
Beispiel #49
0
 def __init__(self, api=None):
     self.api = api or API()
     self.internal_list = []
     self.filename_data = './Data/json_dump'
     self.filename_status = './Data_Status/json_dump'
     self.num_files_written = 0
    config.read(CONF_INI_FILE)

    default = config['locations']
    boundingbox = default[geo].split(',')
    boundingbox = [float(x) for x in boundingbox]

    if MODE == 0:
        out = rollfile(0)

    tries = 0

    l = MyStreamListener()
    #l.output = out
    auth = OAuthHandler(keys['consumer_key'], keys['consumer_secret'])
    auth.set_access_token(keys['access_key'], keys['access_secret'])
    api = API(auth, wait_on_rate_limit=False, wait_on_rate_limit_notify=False)
    #l._api = api

    stream = Stream(auth, l)
    while True:
        list_terms = [
            'kill', 'news', 'fight', 'peace', 'elect', 'terror', 'earthquake',
            'death', 'disaster', 'attack', 'major sports', 'shooting', 'crash',
            'ISIS', 'PKK'
        ]

        try:
            #stream.filter(languages = ['en'], track=list_terms)
            #http://boundingbox.klokantech.com/
            #            boundingboxes = {}
            #            boundingboxes['NYC'] = [-74.2852635,40.3161132,-73.50,40.9249936]
from tweepy import TweepError
from tweepy.api import API
from codecs import open

with open('twitter_api_config.json') as f:
    apiConfig = json.load(f)
ckey = apiConfig['ckey']
consumer_secret = apiConfig['consumer_secret']
access_token_key = apiConfig['access_token_key']
access_token_secret = apiConfig['access_token_secret']

# Twitter authentication.
auth = OAuthHandler(ckey, consumer_secret)
auth.set_access_token(access_token_key, access_token_secret)

api = API(auth_handler=auth)


def reindex(es, newIndex,count,outFile):
    query = {
        "query": {
          "function_score":{
            "query":{
            "bool": {
            "filter": [
                    {
                        "term": {
                            "start": "2017-05-24T21:16:27.396400-04:00"
                    }
                    }
                    ]
def connect_to_api(auth):
    api = API(auth_handler=auth)
    api.verify_credentials()

    return api
forward_ds =[1] 
backward_ds = [1]
forward_min = backward_min = 0
backward = []
is_opt  = 0
max_step = 6
if __name__ == "__main__":
    while 1:
        try:
            #outfile = sys.argv[1]
            #auth = tweepy.BasicAuthHandler('reza_shz', 'mehdireza') 
            auth = tweepy.OAuthHandler("xg2hLKvf1nxw1TUALvx5xA", "MkX0lDUik0mJuc6nxserddbQDWd7ZTErQN6Tf0OhOM")
            auth.set_access_token("174566652-MOGbxytlmUHIN5tEMgl5rgqWdWaIQXYZ6XPyYKl1", "yem38OfoUbsoPZvOVr3k0n3X7JSUDYD8oxAKXvrJw6k")
            twitterApi = API(auth_handler=auth,
                    host='api.twitter.com', search_host='search.twitter.com',
                     cache=FileCache("cache", timeout = -1), secure=False, api_root='/1', search_root='',
                    retry_count=0, retry_delay=0, retry_errors=None,
                    parser=None)
                        
            username1 = sys.argv[1]
            username2 = sys.argv[2]
            user1 = twitterApi.get_user(username1) #@UndefinedVariable
            user2 = twitterApi.get_user(username2) #@UndefinedVariable
            
            
            forward = []
	    forward_ds = [1]
	    backward_ds = [1]
	    forward_min = backward_min = 0
	    backward = []
	    is_opt = 0