def run_user_timeline_download(): print('downloading user-timelines...') api = API(auth, parser=JSONParser()) user_str_ids = [] with open('data/top_users_to_PrEP.txt') as f_in: for line_no, line in enumerate(f_in): if line_no == 1000: break user_str_ids.append(line) users = [] pages = list(range(0, 150)) with open('data/user_timeline_tweets.json', 'w') as f_out: for user_id in user_str_ids: try: time.sleep(60 * 16) for page in pages: for twt in api.user_timeline(user_id, count=20, page=page): f_out.write(json.dumps(twt) + '\n') users.append(user_id) except: pass print('done with user-timelines...') print(users) print(len(user_str_ids))
def _get_rate_limit_status(self, key, secret): """ Get rate limit status for specified access token key. """ auth = OAuthHandler(self.consumer_key, self.consumer_secret) auth.set_access_token(key, secret) api = API(auth) return api.rate_limit_status()
def get_username(self): if self.username is None: api = API(self) user = api.verify_credentials() if user: self.username = user.screen_name else: raise TweepError("Unable to get username, invalid oauth token!") return self.username
def tweet(answer): CONSUMER_KEY = config.get("auth", "CONSUMER_KEY") CONSUMER_SECRET = config.get("auth", "CONSUMER_SECRET") ACCESS_TOKEN = config.get("auth", "ACCESS_TOKEN") ACCESS_TOKEN_SECRET = config.get("auth", "ACCESS_TOKEN_SECRET") auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET) api = API(auth) result = api.update_status(status=answer)
def get_user_id(self): if self.user_id is None: api = API(self) user = api.verify_credentials() if user: self.username = user.screen_name self.user_id = user.id else: raise TweepError('Unable to get user_id,' ' invalid oauth token!') return self.user_id
def _testoauth(self): auth = OAuthHandler(oauth_consumer_key, oauth_consumer_secret) # test getting access token auth_url = auth.get_authorization_url() print 'Please authorize: ' + auth_url verifier = raw_input('PIN: ').strip() self.assert_(len(verifier) > 0) access_token = auth.get_access_token(verifier) self.assert_(access_token is not None) # build api object test using oauth api = API(auth) s = api.update_status('test %i' % random.randint(0, 1000)) api.destroy_status(s.id)
def update_twitter_profile(user): a = API() try: profile = user.get_profile() twitter_user = a.get_user(user_id=profile.twitter_profile.twitter_id) except: twitter_user = None if twitter_user: profile.user.first_name = twitter_user.name.split(" ")[0] profile.user.last_name = " ".join(twitter_user.name.split(" ")[1:]) profile.user.save() profile.website = twitter_user.url profile.profile_image_url = twitter_user.profile_image_url profile.description = twitter_user.description profile.twitter_name = twitter_user.screen_name profile.location=twitter_user.location profile.save()
def scrapeThread(index): auth = OAuthHandler(consumerKeys[index], consumerSecrets[index]) auth.set_access_token(accessTokens[index], accessSecrets[index]) api = API(auth) try: api.verify_credentials() except TweepError: print "Failed to authenticate - most likely reached rate limit/incorrect credentials!" return else: print "You have successfully logged on as: " + api.me().screen_name for i in range(0, numDays): for query in queries[index]: count = 0 cursor = Cursor(api.search, q=quote(query.encode('utf-8')), lang=langs[index], since=sinces[i], until=untils[i], include_entities=True).items() while True: try: tweet = cursor.next() utc = datetime.now().strftime('%Y%m%dT%H%M%S%f') outPath = path.join(outDir, sinces[i], langs[index], utc + '.json') with open(outPath, 'w') as output: output.write(dumps(tweet._json, ensure_ascii=False).encode('utf8')) count += 1 if count == int(perDay / len(queries[index])): break except TweepError: print langs[index] + " - rate limit reached! Pausing thread for 15 minutes." sleep(60 * 15) continue except StopIteration: break print str(count) + " tweets stored in " + outPath
class Tweet(): def __init__(self, auth): self.auth = auth self.api = API(auth) def tweet_with_media(self, fn, status): self.api.update_with_media(fn, status=status) def update_with_media(self, fn, status, tweet_id): # self.api.update_with_media(filename=fn, status=status, in_reply_to_status_id=tweet_id) media = self.api.media_upload(fn) self.api.update_status(status=status, reply_to_status_id=tweet_id, media_ids=[media.media_id]) def update(self, status, tweet_id): self.api.update_status(status=status, reply_to_status_id=tweet_id)
def __init__(self, api=None): self.api = API(retry_count=10, retry_delay=30, timeout=1000, wait_on_rate_limit=True) self.gmaps = googlemaps.Client(key=PARAMS['googlemaps_apikey'])
def __init__(self, api=None): self.api = API(retry_count=100, timeout=1000)
def __init__(self, twitter_id=None, twitter_sn=None, api=None): self.api = api or API() self.twitter_id = twitter_id self.twitter_sn = twitter_sn
def setUp(self): self.auth = create_auth() self.api = API(self.auth) self.api.retry_count = 2 self.api.retry_delay = 5
def __init__(self, dataD, api=None): self.api = api or API() self.dataD = dataD
def update_tweet(text): auth = get_oauth() api = API(auth) api.update_status(status=text)
forward = [] backward = [] if __name__ == "__main__": while 1: try: forward = [] backward = [] #outfile = sys.argv[1] #auth = tweepy.BasicAuthHandler('reza_shz', 'mehdireza') auth = tweepy.OAuthHandler("xg2hLKvf1nxw1TUALvx5xA", "MkX0lDUik0mJuc6nxserddbQDWd7ZTErQN6Tf0OhOM") auth.set_access_token("174566652-MOGbxytlmUHIN5tEMgl5rgqWdWaIQXYZ6XPyYKl1", "yem38OfoUbsoPZvOVr3k0n3X7JSUDYD8oxAKXvrJw6k") twitterApi = API(auth_handler=auth, host='api.twitter.com', search_host='search.twitter.com', cache=FileCache("cache", timeout = -1), secure=False, api_root='/1', search_root='', retry_count=0, retry_delay=0, retry_errors=None, parser=None) #username1, username2,listUsernames = readFile(outfile) user1 = twitterApi.get_user(sys.argv[1]) #@UndefinedVariable user2 = twitterApi.get_user(sys.argv[2]) #@UndefinedVariable forward.append({"obj":user1, "cursor":-1, "friends":[], "cursor_obj":-1, "path":[]}) backward.append({"obj":user2, "cursor":-1, "cursor_obj":-1,"path":[], "followers":[] }) reqs = 0 while 1: fin, path = go_backward() reqs +=1;print reqs if fin: print path;reqs=-2;break while has_node(backward):
def setUp(self): auth = OAuthHandler(oauth_consumer_key, oauth_consumer_secret) auth.set_access_token(oauth_token, oauth_token_secret) self.api = API(auth) self.api.retry_count = 2 self.api.retry_delay = 5
class TweepyAPITests(unittest.TestCase): def setUp(self): auth = OAuthHandler(oauth_consumer_key, oauth_consumer_secret) auth.set_access_token(oauth_token, oauth_token_secret) self.api = API(auth) self.api.retry_count = 2 self.api.retry_delay = 5 def testhometimeline(self): self.api.home_timeline() def testfriendstimeline(self): self.api.friends_timeline() def testusertimeline(self): self.api.user_timeline() self.api.user_timeline('twitter') def testmentions(self): self.api.mentions() def testretweetedbyme(self): self.api.retweeted_by_me() def testretweetedbyuser(self): self.api.retweeted_by_user('twitter') def testretweetedtome(self): self.api.retweeted_to_me() def testretweetsofme(self): self.api.retweets_of_me() def testretweet(self): s = self.api.retweet(123) s.destroy() def testretweets(self): self.api.retweets(123) def testgetstatus(self): self.api.get_status(id=123) def testupdateanddestroystatus(self): # test update text = 'testing %i' % random.randint(0, 1000) update = self.api.update_status(status=text) self.assertEqual(update.text, text) # test destroy deleted = self.api.destroy_status(id=update.id) self.assertEqual(deleted.id, update.id) def testgetuser(self): u = self.api.get_user('twitter') self.assertEqual(u.screen_name, 'twitter') u = self.api.get_user(783214) self.assertEqual(u.screen_name, 'twitter') def testsearchusers(self): self.api.search_users('twitter') def testme(self): me = self.api.me() self.assertEqual(me.screen_name, username) def testfriends(self): self.api.friends() def testfollowers(self): self.api.followers() def testdirectmessages(self): self.api.direct_messages() def testsentdirectmessages(self): self.api.sent_direct_messages() def testsendanddestroydirectmessage(self): # send sent_dm = self.api.send_direct_message(username, text='test message') self.assertEqual(sent_dm.text, 'test message') self.assertEqual(sent_dm.sender.screen_name, username) self.assertEqual(sent_dm.recipient.screen_name, username) # destroy destroyed_dm = self.api.destroy_direct_message(sent_dm.id) self.assertEqual(destroyed_dm.text, sent_dm.text) self.assertEqual(destroyed_dm.id, sent_dm.id) self.assertEqual(destroyed_dm.sender.screen_name, username) self.assertEqual(destroyed_dm.recipient.screen_name, username) def testcreatedestroyfriendship(self): enemy = self.api.destroy_friendship('twitter') self.assertEqual(enemy.screen_name, 'twitter') self.assertFalse(self.api.exists_friendship(username, 'twitter')) friend = self.api.create_friendship('twitter') self.assertEqual(friend.screen_name, 'twitter') self.assertTrue(self.api.exists_friendship(username, 'twitter')) def testshowfriendship(self): source, target = self.api.show_friendship(target_screen_name='twtiter') self.assert_(isinstance(source, Friendship)) self.assert_(isinstance(target, Friendship)) def testfriendsids(self): self.api.friends_ids(username) def testfollowersids(self): self.api.followers_ids(username) def testverifycredentials(self): self.assertNotEqual(self.api.verify_credentials(), False) # make sure that `me.status.entities` is not an empty dict me = self.api.verify_credentials(include_entities=True) self.assertTrue(me.status.entities) # `status` shouldn't be included me = self.api.verify_credentials(skip_status=True) self.assertFalse(hasattr(me, 'status')) def testratelimitstatus(self): self.api.rate_limit_status() def testupdateprofilecolors(self): original = self.api.me() updated = self.api.update_profile_colors( '000', '000', '000', '000', '000') # restore colors self.api.update_profile_colors( original.profile_background_color, original.profile_text_color, original.profile_link_color, original.profile_sidebar_fill_color, original.profile_sidebar_border_color ) self.assertEqual(updated.profile_background_color, '000') self.assertEqual(updated.profile_text_color, '000') self.assertEqual(updated.profile_link_color, '000') self.assertEqual(updated.profile_sidebar_fill_color, '000') self.assertEqual(updated.profile_sidebar_border_color, '000') """ def testupateprofileimage(self): self.api.update_profile_image('examples/profile.png') def testupdateprofilebg(self): self.api.update_profile_background_image('examples/bg.png') """ def testupdateprofile(self): original = self.api.me() profile = { 'name': 'Tweepy test 123', 'url': 'http://www.example.com', 'location': 'pytopia', 'description': 'just testing things out' } updated = self.api.update_profile(**profile) self.api.update_profile( name=original.name, url=original.url, location=original.location, description=original.description ) for k, v in profile.items(): if k == 'email': continue self.assertEqual(getattr(updated, k), v) def testfavorites(self): self.api.favorites() def testcreatedestroyfavorite(self): self.api.create_favorite(4901062372) self.api.destroy_favorite(4901062372) def testenabledisablenotifications(self): self.api.enable_notifications('twitter') self.api.disable_notifications('twitter') def testcreatedestroyblock(self): self.api.create_block('twitter') self.assertEqual(self.api.exists_block('twitter'), True) self.api.destroy_block('twitter') self.assertEqual(self.api.exists_block('twitter'), False) self.api.create_friendship('twitter') # restore def testblocks(self): self.api.blocks() def testblocksids(self): self.api.blocks_ids() def testcreateupdatedestroylist(self): self.api.create_list('tweeps') # XXX: right now twitter throws a 500 here, # issue is being looked into by twitter. # self.api.update_list('tweeps', mode='private') self.api.destroy_list('tweeps') def testlists(self): self.api.lists() def testlistsmemberships(self): self.api.lists_memberships() def testlistssubscriptions(self): self.api.lists_subscriptions() def testlisttimeline(self): self.api.list_timeline('applepie', 'stars') def testgetlist(self): self.api.get_list('applepie', 'stars') def testlistmembers(self): self.api.list_members('applepie', 'stars') def testislistmember(self): uid = self.api.get_user('applepie').id self.api.is_list_member('applepie', 'stars', uid) def testsubscribeunsubscribelist(self): self.api.subscribe_list('applepie', 'stars') self.api.unsubscribe_list('applepie', 'stars') def testlistsubscribers(self): self.api.list_subscribers('applepie', 'stars') def testissubscribedlist(self): uid = self.api.get_user('applepie').id self.api.is_subscribed_list('applepie', 'stars', uid) def testsavedsearches(self): s = self.api.create_saved_search('test') self.api.saved_searches() self.assertEqual(self.api.get_saved_search(s.id).query, 'test') self.api.destroy_saved_search(s.id) def testsearch(self): self.api.search('tweepy') def testtrends(self): self.api.trends_daily() self.api.trends_weekly() def testgeoapis(self): self.api.geo_id(id='c3f37afa9efcf94b') # Austin, TX, USA self.api.nearby_places(lat=30.267370168467806, long=-97.74261474609375) # Austin, TX, USA self.api.reverse_geocode(lat=30.267370168467806, long=-97.74261474609375) # Austin, TX, USA
def forecast(city): config = ConfigParser.RawConfigParser() config.read('settings.cfg') WOEID = config.get(city, 'WOEID') baseurl = "https://query.yahooapis.com/v1/public/yql?" yql_query = ("select * from weather.forecast where woeid=" + WOEID) yql_url = baseurl + urllib.urlencode({'q': yql_query}) + "&format=xml" forecastfile = urllib.urlopen(yql_url) tree = ET.parse(forecastfile) query = tree.getroot() root = query[0] channel = root[0] item = channel[12] description = item[5] forecast = item[7] high = forecast.attrib['high'] low = forecast.attrib['low'] forecastText = forecast.attrib['text'] currentTemp = description.attrib['temp'] currentText = description.attrib['text'] currentC = description.attrib['code'] currentCondition = int(currentC) timeStamp = description.attrib['date'] forecastfile.close() rainCodes = [ 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 18, 35, 37, 38, 39, 40, 47, 45, 46 ] fairCodes = [31, 32, 33, 34] overcastCodes = [26, 27, 28] snowCodes = [13, 14, 15, 16, 41, 42, 43] uniqueCodes = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 17, 18, 19, 20, 21, 22, 23, 24, 35, 37, 38, 39, 40, 44, 45, 46, 47 ] blankCodes = [9, 11, 12, 25, 29, 30, 36, 3200] if currentCondition in rainCodes: yes_choices = [ 'Yes', 'Yes', 'Yea', 'Yep', 'Ya', 'Grab an umbrella', "It's raining" ] a = random.choice(yes_choices) else: no_choices = ['No', 'Nah', 'Nope', 'Not raining', 'Not raining'] a = random.choice(no_choices) if currentCondition in fairCodes: fair_choices = [ ", beautiful day", ", clear day", ", nice day", ", fair weather", "" ] comment = random.choice(fair_choices) if currentCondition in overcastCodes: overcast_choices = [ ", gloomy", ", cloudy", ", overcast", ", grey skies", "" ] comment = random.choice(overcast_choices) if currentCondition in snowCodes: snow_choices = [ ", snowing", ", snow", ", snowfall", ", snow coming down" ] comment = random.choice(snow_choices) if currentCondition in uniqueCodes: uniqueChoice = str(", " + currentText) unique_choices = [uniqueChoice, ""] comment = random.choice(unique_choices) if currentCondition in blankCodes: comment = str('') if 'PM' in timeStamp: timeStamp = "w/ low tonight of " tempHL = low else: timeStamp = "w/ high today of " tempHL = high q = taskqueue.Queue(city) q.purge() a = a.rstrip("\r\n") comment = comment.rstrip("\r\n") comment = comment.lower() forecastText = forecastText.lower() answer = (a + comment + '.\n' + currentTemp + '° now ' + timeStamp + tempHL + '°\n' + "Forecast: " + forecastText + '.') logging.info(answer) CONSUMER_KEY = config.get(city, 'CONSUMER_KEY') CONSUMER_SECRET = config.get(city, 'CONSUMER_SECRET') ACCESS_TOKEN = config.get(city, 'ACCESS_TOKEN') ACCESS_TOKEN_SECRET = config.get(city, 'ACCESS_TOKEN_SECRET') auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET) api = API(auth) result = api.update_status(status=answer)
class Twitter(ApiInterface): def __init__(self) -> None: """ Store easy access for keys """ self.keys = TwitterKey() """ Store pointer for OAuth access """ auth = OAuthHandler(self.keys.consumer_pub, self.keys.consumer_sec) auth.set_access_token(self.keys.access_pub, self.keys.access_sec) self.auth = auth self.api = API(auth) """ Store easy access for twitter info operations """ self.info = TwitterInfo(self.api) self.bio = TwitterBio(self.api) """ Contains info for real-time graphing """ self.streamfile = os.path.join('postr', 'twitter', 'twitter_stream.txt') self.graphfile = os.path.join('postr', 'twitter', 'twitter_graphing.csv') self.blobfile = os.path.join('postr', 'twitter', 'twitter_blob.csv') def post_text(self, text: str) -> bool: """ Posts a tweet containing text """ try: self.api.update_status(status=text) return True except BaseException as e: print(e) return False # pylint: disable=no-self-use, unused-argument def post_video(self, url: str, text: str) -> bool: """ Not applicable """ return False def post_photo(self, url: str, text: str) -> bool: """ Posts a tweet with text and a picture """ try: self.api.update_with_media(filename=url, status=text) return True except BaseException as e: print(e) return False def get_user_followers(self, text: str) -> List[str]: """ Gets user followers, note: this is rate limited """ my_followers = [] i = 0 # Use the cursor module for pagination for follower in Cursor(self.api.followers, screen_name=text).items(): my_followers.append(follower.screen_name) i += 1 # Simple rate limit for requests if i >= 100: i = 0 time.sleep(1) return my_followers def remove_post(self, post_id: str) -> bool: """ Removes a tweet given its ID """ try: self.api.destroy_status(post_id) return True except BaseException as e: print(e) return False def stream_tweets(self, hashtags: List[str], output_filename: str) -> None: """ Streams tweets from a hashtag and writes data into an output file """ self.setup_csv() twitter_streamer = TwitterStreamer(self.keys, self.graphfile) twitter_streamer.stream_tweets(hashtags, output_filename, self.auth) print('done streaming') def setup_csv(self) -> None: """ Initializes a csv file for time series graphing """ csvData = ['Tweet', 'Time'] with open(self.graphfile, 'w') as csvFile: writer = csv.writer(csvFile) writer.writerow(csvData) csvFile.close() # pylint: disable=no-self-use, unused-argument def get_user_likes(self) -> int: """ Not applicable, see helper methods in TwitterInfo class""" return -1 def read_csv_col(self, colNum: int, filename: str) -> List[str]: """ Reads a specific column by index in the graph csv""" col = [] with open(filename, 'r') as rf: reader = csv.reader(rf, delimiter=',') for row in reader: col.append(str(row[colNum])) return col[1::] # Ignore the csv header def analyzeSentiment(self) -> None: """ Converts a real-time tweet content into a positivity score""" with open(self.blobfile, 'w') as bf: writer = csv.writer(bf) graph_data = zip( self.read_csv_col(0, self.graphfile), self.read_csv_col(1, self.graphfile), ) for pair in graph_data: text = str(re.sub(r'[^a-zA-Z ]+', '', pair[0])) score = Twitter.polarity(text) writer.writerow([pair[1], score]) bf.close() @staticmethod def polarity(text: str) -> float: """ Returns the polarity of text. Made into a separate method to provide easy modification if needed in the future """ return float(TextBlob(text).sentiment.polarity) def stream_and_graph(self, hashtags: List[str]) -> None: """ Streams tweets in real time, then graphs their sentiment """ self.stream_tweets(hashtags, self.streamfile) self.analyzeSentiment() self.graph_blob() def graph_blob(self) -> None: """ Graphs a blob file for twitter sentiment """ dates = self.read_csv_col(0, self.blobfile) # Truncate the datetime object to the minute precision dates = [d[:DATETIME_MILLISECOND_PRECISION] for d in dates] # Truncate off scores past a precision for easy viewing on the plot scores = list( map(lambda x: x[:SCORE_PRECISION], self.read_csv_col(1, self.blobfile))) plt.plot( dates, scores, ) plt.ylabel('Positivity Score') plt.xlabel('Time') # beautify the x-labels plt.gcf().autofmt_xdate() plt.show() def update_bio(self, message: str) -> None: """ Sets an authenticated user's bio to a specified message """ self.api.update_profile(description=message)
def get(self): verifier = self.request.get("oauth_verifier") if verifier: # Get access token handler = auth.OAuthHandler(config.CONSUMER_KEY, config.CONSUMER_SECRET) handler.set_request_token(self.session.get("request_token_key"), self.session.get("request_token_secret")) access_token = handler.get_access_token(verifier) if access_token: # Get user logging.info("Access token: %s" %(access_token)) user = User.all().filter("twitter_access_token_key", access_token.key).get() if((not user) or (user and user.updated < datetime.now() - timedelta(0,86400))): logging.info("Connecting to the Twitter API") api = API(handler) temp_user = api.verify_credentials() temp_image = urlfetch.Fetch(str(temp_user.profile_image_url).replace("_normal", "")).content # Transform image into .PNG image_manager = images.Image(image_data=temp_image) image_manager.rotate(360) temp_png = image_manager.execute_transforms() logging.info("Encoded into .PNG") # Save or update image in Cloud storage filename = config.FOLDER + "/" + str(temp_user.id) gcs_file = gcs.open(filename,'w',content_type="image/png",options={"x-goog-acl":"public-read"}) gcs_file.write(temp_png) gcs_file.close() logging.info("Image saved to Google Cloud Storage") # Get avatar blob_filename = "/gs" + filename blobkey = blobstore.create_gs_key(blob_filename) temp_avatar = str(images.get_serving_url(blobkey)) if not user: logging.info("User did not exist") user = User( twitter_id = str(temp_user.id), twitter_access_token_key = str(access_token.key), twitter_access_token_secret = str(access_token.secret), username = str(temp_user.screen_name).lower(), name = temp_user.name, bio = temp_user.description, avatar = temp_avatar, ) else: logging.info("User had to be updated") user.twitter_id = str(temp_user.id) user.twitter_access_token_key = str(access_token.key) user.twitter_access_token_secret = str(access_token.secret) user.username = str(temp_user.screen_name).lower() user.name = temp_user.name user.bio = temp_user.description user.avatar = temp_avatar user.put() logging.info("User @%s saved in datastore"%(user.username)) # Save user in session self.session["id"] = user.key().id() else: logging.error("No access token from Twitter") print "Error" else: logging.error("No verifier") print "Error" # Redirect users to the page they came from or the page they're supposed to head to next = self.session.get("next") redirect = self.session.get("referer") if next: redirect = next self.redirect(str(redirect))
def __init__(self, api=None): self.api = api or API() self.n = 0 self.m = MAX_RECORDS_TO_PROCESS
def on_connect(): API(self.auth).update_status(mock_tweet())
# Tweepy # Copyright 2009-2010 Joshua Roesslein # See LICENSE for details. """ Tweepy Twitter API library """ __version__ = '3.0' __author__ = 'Joshua Roesslein' __license__ = 'MIT' from tweepy.models import Status, User, DirectMessage, Friendship, SavedSearch, SearchResults, ModelFactory, Category from tweepy.error import TweepError from tweepy.api import API from tweepy.cache import Cache, MemoryCache, FileCache from tweepy.auth import OAuthHandler, AppAuthHandler from tweepy.streaming import Stream, StreamListener from tweepy.cursor import Cursor # Global, unauthenticated instance of API api = API() def debug(enable=True, level=1): import httplib httplib.HTTPConnection.debuglevel = level
forward = [] backward = [] #outfile = sys.argv[1] #auth = tweepy.BasicAuthHandler('reza_shz', 'mehdireza') auth = tweepy.OAuthHandler( "xg2hLKvf1nxw1TUALvx5xA", "MkX0lDUik0mJuc6nxserddbQDWd7ZTErQN6Tf0OhOM") auth.set_access_token( "174566652-MOGbxytlmUHIN5tEMgl5rgqWdWaIQXYZ6XPyYKl1", "yem38OfoUbsoPZvOVr3k0n3X7JSUDYD8oxAKXvrJw6k") twitterApi = API(auth_handler=auth, host='api.twitter.com', search_host='search.twitter.com', cache=FileCache("cache", timeout=-1), secure=False, api_root='/1', search_root='', retry_count=0, retry_delay=0, retry_errors=None, parser=None) #username1, username2,listUsernames = readFile(outfile) user1 = twitterApi.get_user(sys.argv[1]) #@UndefinedVariable user2 = twitterApi.get_user(sys.argv[2]) #@UndefinedVariable forward.append({ "obj": user1, "cursor": -1, "friends": [], "cursor_obj": -1,
def setUp(self): self.auth = create_auth() self.api = API(self.auth)
def __init__(self, auth): self.auth = auth self.api = API(auth)
def __init__(self, api=None): self.api = api or API() self.data_d = data_json['Discord']
def go(collection_type, project_id, collector_id, rawdir, logdir): if collection_type not in ['track', 'follow', 'none']: print("ThreadedCollector accepts inputs 'track', 'follow', or 'none'.") print('Exiting with invalid params...') sys.exit() else: # Grab collector & project details from DB project = db.get_project_detail(project_id) resp = db.get_collector_detail(project_id, collector_id) if project['status'] and resp['status']: collector = resp['collector'] configdb = project['project_config_db'] project_config_db = db.connection[configdb] project_config_db = project_config_db.config collector_name = collector['collector_name'] project_name = project['project_name'] else: 'Invalid project account & collector. Try again!' # module_config = project_config_db.find_one({'module': 'twitter'}) # Reference for controller if script is active or not. project_config_db.update({'_id': ObjectId(collector_id)}, {'$set': { 'active': 1 }}) Config = configparser.ConfigParser() Config.read(PLATFORM_CONFIG_FILE) # Creates logger w/ level INFO logger = logging.getLogger(collector_name) logger.setLevel(logging.INFO) # Creates rotating file handler w/ level INFO fh = logging.handlers.TimedRotatingFileHandler( logdir + '/' + project_name + '-' + collector_name + '-' + collection_type + '-collector-log-' + collector_id + '.out', 'D', 1, 30, None, False, False) fh.setLevel(logging.INFO) # Creates formatter and applies to rotating handler format = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s' datefmt = '%m-%d %H:%M' formatter = logging.Formatter(format, datefmt) fh.setFormatter(formatter) # Finishes by adding the rotating, formatted handler logger.addHandler(fh) # Sets current date as starting point tmpDate = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') logger.info('Starting collection system at %s' % tmpDate) logger.info('Collector name: %s' % collector_name) # Grabs tweets out file info from config # TODO - move this info to Mongo tweetsOutFilePath = rawdir + '/' if not os.path.exists(tweetsOutFilePath): os.makedirs(tweetsOutFilePath) tweetsOutFileDateFrmt = Config.get('files', 'tweets_file_date_frmt', 0) tweetsOutFile = Config.get('files', 'tweets_file', 0) # NOTE - proper naming for api_auth dictionary from front_end oauth_info = collector['api_auth'] consumerKey = oauth_info['consumer_key'] consumerSecret = oauth_info['consumer_secret'] accessToken = oauth_info['access_token'] accessTokenSecret = oauth_info['access_token_secret'] # Authenticates via app info auth = OAuthHandler(consumerKey, consumerSecret) auth.set_access_token(accessToken, accessTokenSecret) # Sets Mongo collection; sets rate_limitng & error counts to 0 if 'stream_limit_loss' not in collector: project_config_db.update( {'_id': ObjectId(collector_id)}, {'$set': { 'stream_limit_loss': { 'counts': [], 'total': 0 } }}) if 'rate_limit_count' not in collector: project_config_db.update({'_id': ObjectId(collector_id)}, {'$set': { 'rate_limit_count': 0 }}) if 'error_code' not in collector: project_config_db.update({"_id": ObjectId(collector_id)}, {'$set': { 'error_code': 0 }}) runCollector = collector['collector']['run'] if runCollector: print('Starting process w/ start signal %d' % runCollector) logger.info('Starting process w/ start signal %d' % runCollector) collectingData = False i = 0 myThreadCounter = 0 runLoopSleep = 0 while runCollector: i += 1 # Finds Mongo collection & grabs signal info # If Mongo is offline throws an acception and continues exception = None try: resp = db.get_collector_detail(project_id, collector_id) collector = resp['collector'] flags = collector['collector'] runCollector = flags['run'] collectSignal = flags['collect'] updateSignal = flags['update'] except Exception as exception: logger.info('Mongo connection refused with exception: %s' % exception) """ Collection process is running, and: A) An update has been triggered -OR- B) The collection signal is not set -OR- C) Run signal is not set """ if collectingData and (updateSignal or not collectSignal or not runCollector): # Update has been triggered if updateSignal: logger.info( 'MAIN: received UPDATE signal. Attempting to stop collection thread' ) resp = db.set_collector_status(project_id, collector_id, collector_status=1) # Collection thread triggered to stop if not collectSignal: logger.info( 'MAIN: received STOP signal. Attempting to stop collection thread' ) # Entire process trigerred to stop if not runCollector: logger.info( 'MAIN: received EXIT signal. Attempting to stop collection thread' ) resp = db.set_collector_status(project_id, collector_id, collector_status=0) collectSignal = 0 # Send stream disconnect signal, kills thread stream.disconnect() wait_count = 0 while e.isSet() is False: wait_count += 1 print('%d) Waiting on collection thread shutdown' % wait_count) sleep(wait_count) collectingData = False logger.info('COLLECTION THREAD: stream stopped after %d tweets' % l.tweet_count) logger.info('COLLECTION THREAD: collected %d error tweets' % l.delete_count) print('COLLECTION THREAD: collected %d error tweets' % l.delete_count) logger.info( 'COLLECTION THREAD: lost %d tweets to stream rate limit' % l.limit_count) print('COLLECTION THREAD: lost %d tweets to stream rate limit' % l.limit_count) print('COLLECTION THREAD: stream stopped after %d tweets' % l.tweet_count) if not l.error_code == 0: resp = db.set_collector_status(project_id, collector_id, collector_status=0) project_config_db.update( {"_id": ObjectId(collector_id)}, {'$set': { 'error_code': l.error_code }}) if not l.limit_count == 0: project_config_db.update( {'_id': ObjectId(collector_id)}, {'$set': { 'stream_limit_loss.total': l.limit_count }}) if not l.rate_limit_count == 0: project_config_db.update({'_id': ObjectId(collector_id)}, {'$set': { 'rate_limit_count': 0 }}) # Collection has been signaled & main program thread is running # TODO - Check Mongo for handle:ID pairs # Only call for new pairs if collectSignal and (threading.activeCount() == 1): # Names collection thread & adds to counter myThreadCounter += 1 myThreadName = 'collector-' + collection_type + '%s' % myThreadCounter termsList = collector['terms_list'] if termsList: print('Terms list length: ' + str(len(termsList))) # Grab IDs for follow stream if collection_type == 'follow': """ TODO - Update Mongo terms w/ set for collect status 0 or 1 # Updates current stored handles to collect 0 if no longer listed in terms file stored_terms = doc['termsList'] for user in stored_terms: if user['handle'] not in termsList: user_id = user['id'] mongo_config.update({'module': 'collector-follow'}, {'$pull': {'termsList': {'handle': user['handle']}}}) mongo_config.update({'module': 'collecting-follow'}, {'$set': {'termsList': {'handle': user['handle'], 'id': user_id, 'collect': 0 }}}) # Loops thru current stored handles and adds list if both: # A) Value isn't set to None (not valid OR no longer in use) all_stored_handles = [user['handle'] for user in stored_terms] stored_handles = [user['handle'] for user in stored_terms if user['id'] and user['collect']] print 'MAIN: %d user ids for collection found in Mongo!' % len(stored_handles) """ # Loop thru & query (except handles that have been stored) print('MAIN: Querying Twitter API for handle:id pairs...') logger.info( 'MAIN: Querying Twitter API for handle:id pairs...') # Initiates REST API connection twitter_api = API(auth_handler=auth) failed_handles = [] success_handles = [] # Loops thru user-given terms list for item in termsList: term = item['term'] # If term already has a valid ID, pass if item['id'] is not None: pass # Queries the Twitter API for the ID value of the handle else: try: user = twitter_api.get_user(screen_name=term) except TweepError as tweepy_exception: error_message = tweepy_exception.args[0][0][ 'message'] code = tweepy_exception.args[0][0]['code'] # Rate limited for 15 minutes w/ code 88 if code == 88: print( 'MAIN: User ID grab rate limited. Sleeping for 15 minutes.' ) logger.exception( 'MAIN: User ID grab rate limited. Sleeping for 15 minutes.' ) time.sleep(900) # Handle doesn't exist, added to Mongo as None elif code == 34: print( 'MAIN: User w/ handle %s does not exist.' % term) logger.exception( 'MAIN: User w/ handle %s does not exist.' % term) item['collect'] = 0 item['id'] = None failed_handles.append(term) # Success - handle:ID pair stored in Mongo else: user_id = user._json['id_str'] item['id'] = user_id success_handles.append(term) print('MAIN: Collected %d new ids for follow stream.' % len(success_handles)) logger.info( 'MAIN: Collected %d new ids for follow stream.' % len(success_handles)) print('MAIN: %d handles failed to be found.' % len(failed_handles)) logger.info('MAIN: %d handles failed to be found.' % len(failed_handles)) logger.info(failed_handles) print(failed_handles) print( 'MAIN: Grabbing full list of follow stream IDs from Mongo.' ) logger.info( 'MAIN: Grabbing full list of follow stream IDs from Mongo.' ) # Updates term list with follow values project_config_db.update( {'_id': ObjectId(collector_id)}, {'$set': { 'terms_list': termsList }}) # Loops thru current stored handles and adds to list if: # A) Value isn't set to None (not valid OR no longer in use) ids = [ item['id'] for item in termsList if item['id'] and item['collect'] ] noncoll = [ item['term'] for item in termsList if not item['collect'] ] termsList = ids else: terms = [ item['term'] for item in termsList if item['collect'] ] noncoll = [ item['term'] for item in termsList if not item['collect'] ] termsList = terms print('Terms List: ') print(termsList) print('') print('Not collecting for: ') print(noncoll) print('') logger.info('Terms list: %s' % str(termsList).strip('[]')) logger.info('Not collecting for: %s' % str(noncoll).strip('[]')) print( 'COLLECTION THREAD: Initializing Tweepy listener instance...') logger.info( 'COLLECTION THREAD: Initializing Tweepy listener instance...') l = fileOutListener(tweetsOutFilePath, tweetsOutFileDateFrmt, tweetsOutFile, logger, collection_type, project_id, collector_id) print('TOOLKIT STREAM: Initializing Tweepy stream listener...') logger.info( 'TOOLKIT STREAM: Initializing Tweepy stream listener...') # Initiates async stream via Tweepy, which handles the threading # TODO - location & language languages = collector['languages'] location = collector['location'] if languages: print('%s language codes found!' % len(languages)) if location: print('Location points found!') for i in range(len(location)): location[i] = float(location[i]) stream = ToolkitStream(auth, l, logger, project_id, collector_id, retry_count=100) if collection_type == 'track': stream.filter(track=termsList, languages=languages, locations=location, is_async=True) elif collection_type == 'follow': stream.filter(follow=termsList, languages=languages, locations=location, is_async=True) elif collection_type == 'none': stream.filter(locations=location, languages=languages, is_async=True) else: sys.exit('ERROR: Unrecognized stream filter.') collectingData = True print('MAIN: Collection thread started (%s)' % myThreadName) logger.info('MAIN: Collection thread started (%s)' % myThreadName) # if threading.activeCount() == 1: # print "MAIN: %d iteration with no collection thread running" % i # else: # print "MAIN: %d iteration with collection thread running (%d)" % (i, threading.activeCount()) # Incrementally delays loop if Mongo is offline, otherwise 2 seconds max_sleep_time = 1800 if exception: if runLoopSleep < max_sleep_time: runLoopSleep += 2 else: runLoopSleep = max_sleep_time print("Exception caught, sleeping for: %d" % runLoopSleep) time.sleep(runLoopSleep) else: time.sleep(2) logger.info('Exiting Collection Program...') print('Exiting Collection Program...') # Reference for controller if script is active or not. project_config_db.update({'_id': ObjectId(collector_id)}, {'$set': { 'active': 0 }})
def __init__(self, auth): self.auth = auth self.api = API(self.auth)
'Please specify the key via environment var or in source code') input_data_path = err_for_input() outfile = 'outfile.csv' pd_df = pandas.read_csv(input_data_path) print(pd_df.dtypes) e = os.environ auth = tweepy.auth.OAuthHandler(consumer_key=e.get('CONSUMER_KEY', err_for_key()), consumer_secret=e.get('CONSUMER_SECRET', err_for_key())) client = API(auth_handler=auth) def get_chunks_of_n(n): # If I was doing this again i would have used more_itertools.chunked, but this is already written counter = count() rows = pd_df.iterrows() res = [] count_val = 0 while count_val < len(pd_df): try: count_val = next(counter) if count_val > 0 and count_val % n == 0: yield res counter = count() res = []
logger.info('Streamに接続しました') return def on_disconnect(self, notice): logger.info('Streamから切断されました:' + str(notice.code)) return def on_limit(self, track): logger.warning('受信リミットが発生しました:' + str(track)) return def on_timeout(self): logger.info('タイムアウト') return True def on_warning(self, notice): logger.warning('警告メッセージ:' + str(notice.message)) return def on_exception(self, exception): logger.error('例外エラー:' + str(exception)) return True # main if __name__ == '__main__': auth = get_oauth() api = API(auth) stream = Stream(auth, Listener(), secure=True) stream.userstream()
def __init__(self, streambot, api=None): self.api = api or API() # needed ref to streambot so method can be called self.streambot = streambot self.tw_bot_id = 841013993602863104 self.ignored_users = []
def __init__(self, api=None): self.api = api or API() self.n = 0 self.m = 20
{'$pull': {'termsList': {'handle': user['handle']}}}) mongo_config.update({'module': 'collecting-follow'}, {'$set': {'termsList': {'handle': user['handle'], 'id': user_id, 'collect': 0 }}}) # Loops thru current stored handles and adds list if both: # A) Value isn't set to None (not valid OR no longer in use) all_stored_handles = [user['handle'] for user in stored_terms] stored_handles = [user['handle'] for user in stored_terms if user['id'] and user['collect']] print 'MAIN: %d user ids for collection found in Mongo!' % len(stored_handles) # Loop thru & query (except handles that have been stored) print 'MAIN: Querying Twitter API for new handle:id pairs...' logger.info('MAIN: Querying Twitter API for new handle:id pairs...') # Initiates REST API connection twitter_api = API(auth_handler=auth) failed_handles = [] success_handles = [] # Loops thru user-given terms list for handle in termsList: # If handle already stored, no need to query for ID if handle in stored_handles: pass # Queries the Twitter API for the ID value of the handle else: try: user = twitter_api.get_user(screen_name=handle) except TweepError as tweepy_exception: error_message = tweepy_exception.args[0][0]['message'] code = tweepy_exception.args[0][0]['code'] # Rate limited for 15 minutes w/ code 88
def go(collection_type, project_id, collector_id, rawdir, logdir): if collection_type not in ['track', 'follow', 'none']: print "ThreadedCollector accepts inputs 'track', 'follow', or 'none'." print 'Exiting with invalid params...' sys.exit() else: # Grab collector & project details from DB project = db.get_project_detail(project_id) resp = db.get_collector_detail(project_id, collector_id) if project['status'] and resp['status']: collector = resp['collector'] configdb = project['project_config_db'] project_config_db = db.connection[configdb] project_config_db = project_config_db.config collector_name = collector['collector_name'] project_name = project['project_name'] else: 'Invalid project account & collector. Try again!' # module_config = project_config_db.find_one({'module': 'twitter'}) # Reference for controller if script is active or not. project_config_db.update({'_id': ObjectId(collector_id)}, {'$set': {'active': 1}}) Config = ConfigParser.ConfigParser() Config.read(PLATFORM_CONFIG_FILE) # Creates logger w/ level INFO logger = logging.getLogger(collector_name) logger.setLevel(logging.INFO) # Creates rotating file handler w/ level INFO fh = logging.handlers.TimedRotatingFileHandler(logdir + '/' + project_name + '-' + collector_name + '-' + collection_type + '-collector-log-' + collector_id + '.out', 'D', 1, 30, None, False, False) fh.setLevel(logging.INFO) # Creates formatter and applies to rotating handler format = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s' datefmt = '%m-%d %H:%M' formatter = logging.Formatter(format, datefmt) fh.setFormatter(formatter) # Finishes by adding the rotating, formatted handler logger.addHandler(fh) # Sets current date as starting point tmpDate = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') logger.info('Starting collection system at %s' % tmpDate) logger.info('Collector name: %s' % collector_name) # Grabs tweets out file info from config # TODO - move this info to Mongo tweetsOutFilePath = rawdir + '/' if not os.path.exists(tweetsOutFilePath): os.makedirs(tweetsOutFilePath) tweetsOutFileDateFrmt = Config.get('files', 'tweets_file_date_frmt', 0) tweetsOutFile = Config.get('files', 'tweets_file', 0) # NOTE - proper naming for api_auth dictionary from front_end oauth_info = collector['api_auth'] consumerKey = oauth_info['consumer_key'] consumerSecret = oauth_info['consumer_secret'] accessToken = oauth_info['access_token'] accessTokenSecret = oauth_info['access_token_secret'] # Authenticates via app info auth = OAuthHandler(consumerKey, consumerSecret) auth.set_access_token(accessToken, accessTokenSecret) # Sets Mongo collection; sets rate_limitng & error counts to 0 if 'stream_limit_loss' not in collector: project_config_db.update({'_id': ObjectId(collector_id)}, {'$set' : { 'stream_limit_loss': { 'counts': [], 'total': 0 }}}) if 'rate_limit_count' not in collector: project_config_db.update({'_id': ObjectId(collector_id)}, {'$set': {'rate_limit_count': 0}}) if 'error_code' not in collector: project_config_db.update({"_id" : ObjectId(collector_id)}, {'$set' : {'error_code': 0}}) runCollector = collector['collector']['run'] if runCollector: print 'Starting process w/ start signal %d' % runCollector logger.info('Starting process w/ start signal %d' % runCollector) collectingData = False i = 0 myThreadCounter = 0 runLoopSleep = 0 while runCollector: i += 1 # Finds Mongo collection & grabs signal info # If Mongo is offline throws an acception and continues exception = None try: resp = db.get_collector_detail(project_id, collector_id) collector = resp['collector'] flags = collector['collector'] runCollector = flags['run'] collectSignal = flags['collect'] updateSignal = flags['update'] except Exception, exception: logger.info('Mongo connection refused with exception: %s' % exception) """ Collection process is running, and: A) An update has been triggered -OR- B) The collection signal is not set -OR- C) Run signal is not set """ if collectingData and (updateSignal or not collectSignal or not runCollector): # Update has been triggered if updateSignal: logger.info('MAIN: received UPDATE signal. Attempting to stop collection thread') resp = db.set_collector_status(project_id, collector_id, collector_status=1) # Collection thread triggered to stop if not collectSignal: logger.info('MAIN: received STOP signal. Attempting to stop collection thread') # Entire process trigerred to stop if not runCollector: logger.info('MAIN: received EXIT signal. Attempting to stop collection thread') resp = db.set_collector_status(project_id, collector_id, collector_status=0) collectSignal = 0 # Send stream disconnect signal, kills thread stream.disconnect() wait_count = 0 while e.isSet() is False: wait_count += 1 print '%d) Waiting on collection thread shutdown' % wait_count sleep(wait_count) collectingData = False logger.info('COLLECTION THREAD: stream stopped after %d tweets' % l.tweet_count) logger.info('COLLECTION THREAD: collected %d error tweets' % l.delete_count) print 'COLLECTION THREAD: collected %d error tweets' % l.delete_count logger.info('COLLECTION THREAD: lost %d tweets to stream rate limit' % l.limit_count) print 'COLLECTION THREAD: lost %d tweets to stream rate limit' % l.limit_count print 'COLLECTION THREAD: stream stopped after %d tweets' % l.tweet_count if not l.error_code == 0: resp = db.set_collector_status(project_id, collector_id, collector_status=0) project_config_db.update({"_id" : ObjectId(collector_id)}, {'$set' : {'error_code': l.error_code}}) if not l.limit_count == 0: project_config_db.update({'_id': ObjectId(collector_id)}, {'$set' : { 'stream_limit_loss.total': l.limit_count}}) if not l.rate_limit_count == 0: project_config_db.update({'_id': ObjectId(collector_id)}, {'$set': {'rate_limit_count': 0}}) # Collection has been signaled & main program thread is running # TODO - Check Mongo for handle:ID pairs # Only call for new pairs if collectSignal and (threading.activeCount() == 1): # Names collection thread & adds to counter myThreadCounter += 1 myThreadName = 'collector-' + collection_type + '%s' % myThreadCounter termsList = collector['terms_list'] if termsList: print 'Terms list length: ' + str(len(termsList)) # Grab IDs for follow stream if collection_type == 'follow': """ TODO - Update Mongo terms w/ set for collect status 0 or 1 # Updates current stored handles to collect 0 if no longer listed in terms file stored_terms = doc['termsList'] for user in stored_terms: if user['handle'] not in termsList: user_id = user['id'] mongo_config.update({'module': 'collector-follow'}, {'$pull': {'termsList': {'handle': user['handle']}}}) mongo_config.update({'module': 'collecting-follow'}, {'$set': {'termsList': {'handle': user['handle'], 'id': user_id, 'collect': 0 }}}) # Loops thru current stored handles and adds list if both: # A) Value isn't set to None (not valid OR no longer in use) all_stored_handles = [user['handle'] for user in stored_terms] stored_handles = [user['handle'] for user in stored_terms if user['id'] and user['collect']] print 'MAIN: %d user ids for collection found in Mongo!' % len(stored_handles) """ # Loop thru & query (except handles that have been stored) print 'MAIN: Querying Twitter API for handle:id pairs...' logger.info('MAIN: Querying Twitter API for handle:id pairs...') # Initiates REST API connection twitter_api = API(auth_handler=auth) failed_handles = [] success_handles = [] # Loops thru user-given terms list for item in termsList: term = item['term'] # If term already has a valid ID, pass if item['id'] is not None: pass # Queries the Twitter API for the ID value of the handle else: try: user = twitter_api.get_user(screen_name=term) except TweepError as tweepy_exception: error_message = tweepy_exception.args[0][0]['message'] code = tweepy_exception.args[0][0]['code'] # Rate limited for 15 minutes w/ code 88 if code == 88: print 'MAIN: User ID grab rate limited. Sleeping for 15 minutes.' logger.exception('MAIN: User ID grab rate limited. Sleeping for 15 minutes.') time.sleep(900) # Handle doesn't exist, added to Mongo as None elif code == 34: print 'MAIN: User w/ handle %s does not exist.' % term logger.exception('MAIN: User w/ handle %s does not exist.' % term) item['collect'] = 0 item['id'] = None failed_handles.append(term) # Success - handle:ID pair stored in Mongo else: user_id = user._json['id_str'] item['id'] = user_id success_handles.append(term) print 'MAIN: Collected %d new ids for follow stream.' % len(success_handles) logger.info('MAIN: Collected %d new ids for follow stream.' % len(success_handles)) print 'MAIN: %d handles failed to be found.' % len(failed_handles) logger.info('MAIN: %d handles failed to be found.' % len(failed_handles)) logger.info(failed_handles) print failed_handles print 'MAIN: Grabbing full list of follow stream IDs from Mongo.' logger.info('MAIN: Grabbing full list of follow stream IDs from Mongo.') # Updates term list with follow values project_config_db.update({'_id': ObjectId(collector_id)}, {'$set': {'terms_list': termsList}}) # Loops thru current stored handles and adds to list if: # A) Value isn't set to None (not valid OR no longer in use) ids = [item['id'] for item in termsList if item['id'] and item['collect']] noncoll = [item['term'] for item in termsList if not item['collect']] termsList = ids else: terms = [item['term'] for item in termsList if item['collect']] noncoll = [item['term'] for item in termsList if not item['collect']] termsList = terms print 'Terms List: ' print termsList print '' print 'Not collecting for: ' print noncoll print '' logger.info('Terms list: %s' % str(termsList).strip('[]')) logger.info('Not collecting for: %s' % str(noncoll).strip('[]')) print 'COLLECTION THREAD: Initializing Tweepy listener instance...' logger.info('COLLECTION THREAD: Initializing Tweepy listener instance...') l = fileOutListener(tweetsOutFilePath, tweetsOutFileDateFrmt, tweetsOutFile, logger, collection_type, project_id, collector_id) print 'TOOLKIT STREAM: Initializing Tweepy stream listener...' logger.info('TOOLKIT STREAM: Initializing Tweepy stream listener...') # Initiates async stream via Tweepy, which handles the threading # TODO - location & language languages = collector['languages'] location = collector['location'] if languages: print '%s language codes found!' % len(languages) if location: print 'Location points found!' for i in range(len(location)): location[i] = float(location[i]) stream = ToolkitStream(auth, l, logger, project_id, collector_id, retry_count=100) if collection_type == 'track': stream.filter(track=termsList, languages=languages, locations=location, async=True) elif collection_type == 'follow': stream.filter(follow=termsList, languages=languages, locations=location, async=True) elif collection_type == 'none': stream.filter(locations=location, languages=languages, async=True) else: sys.exit('ERROR: Unrecognized stream filter.') collectingData = True print 'MAIN: Collection thread started (%s)' % myThreadName logger.info('MAIN: Collection thread started (%s)' % myThreadName) #if threading.activeCount() == 1: # print "MAIN: %d iteration with no collection thread running" % i #else: # print "MAIN: %d iteration with collection thread running (%d)" % (i, threading.activeCount()) # Incrementally delays loop if Mongo is offline, otherwise 2 seconds max_sleep_time = 1800 if exception: if runLoopSleep < max_sleep_time: runLoopSleep += 2 else: runLoopSleep = max_sleep_time print "Exception caught, sleeping for: %d" % runLoopSleep time.sleep(runLoopSleep) else: time.sleep( 2 )
def __init__(self): self.api = API() self.config = ConfigParser.ConfigParser() self.config.read(os.path.abspath('config.ini')) self.r = redis.StrictRedis(host=self.config.get('DB', 'host'), port=self.config.get('DB', 'port'))
if __name__ == "__main__": while 1: try: auth = tweepy.OAuthHandler("xg2hLKvf1nxw1TUALvx5xA", "MkX0lDUik0mJuc6nxserddbQDWd7ZTErQN6Tf0OhOM") auth.set_access_token( "174566652-MOGbxytlmUHIN5tEMgl5rgqWdWaIQXYZ6XPyYKl1", "yem38OfoUbsoPZvOVr3k0n3X7JSUDYD8oxAKXvrJw6k" ) twitterApi = API( auth_handler=auth, host="api.twitter.com", search_host="search.twitter.com", cache=DBFileCache(DBCache(timeout=-1), FileCache("cache", timeout=-1), timeout=-1), secure=False, api_root="/1", search_root="", retry_count=0, retry_delay=0, retry_errors=None, parser=None, ) i = random.randint(1, 1000000000) u = handle_func(twitterApi.get_user, user_id=i) tweets = u.timeline(count=100, include_rts=1) friends = twitterApi.friends_ids(user_id=u.id) followers = twitterApi.followers_ids(user_id=u.id) except Exception, e: print e
def update_reply(text, reply_id, screen_name): auth = get_oauth() api = API(auth) st = "@" + str(screen_name) + " " + str(text) api.update_status(status=st, in_reply_to_status_id=reply_id)
def setUp(self): self.auth = create_auth() self.api = API(self.auth) self.api.retry_count = 2 self.api.retry_delay = 0 if use_replay else 5
def __init__(self, api=None): self.api = api or API()
class GetTwitterData(): def __init__(self, auth): self.auth = auth self.api = API(self.auth) def get_all_tweets(self, screen_name, tweet_count): # initialize a list to hold all the tweepy Tweets alltweets = [] # make initial request for most recent tweets (200 is the maximum allowed count) new_tweets = self.api.user_timeline(screen_name=screen_name, count=tweet_count) # save most recent tweets alltweets.extend(new_tweets) # save the id of the oldest tweet less one oldest = alltweets[-1].id - 1 # keep grabbing tweets until there are no tweets left to grab while len(new_tweets) > 0: print "getting tweets before %s" % (oldest) # all subsiquent requests use the max_id param to prevent duplicates new_tweets = self.api.user_timeline(screen_name=screen_name, count=tweet_count, max_id=oldest) # save most recent tweets alltweets.extend(new_tweets) # update the id of the oldest tweet less one oldest = alltweets[-1].id - 1 print "...%s tweets downloaded so far" % (len(alltweets)) # transform the tweepy tweets into a 2D array that will populate the csv out_tweets = [[screen_name, tweet.id_str, tweet.created_at, tweet.text.encode("utf-8")] for tweet in alltweets] # write the csv with open(users_tweets_path, 'wb') as f: writer = csv.writer(f) writer.writerow(["screen_name", "id", "created_at", "text"]) writer.writerows(out_tweets) pass def new_get_all_tweets(self, screen_name, tweet_count): # make initial request for most recent tweets (200 is the maximum allowed count) new_tweets = self.api.user_timeline(screen_name=screen_name, count=tweet_count) # transform the tweepy tweets into a 2D array that will populate the csv out_tweets = [[screen_name, tweet.id_str, tweet.created_at, tweet.text.encode("utf-8")] for tweet in new_tweets] # write the csv with open(users_tweets_path, 'wb') as f: writer = csv.writer(f) writer.writerow(["screen_name", "id", "created_at", "text"]) writer.writerows(out_tweets) def find_friends(self, screen_name): print "screen_name: " + screen_name # page = self.api.followers_ids(screen_name=screen_name) for id in Cursor(self.api.followers_ids, screen_name=screen_name,count=50).pages(): print id print "ids are: " + str(len(id)) # if (len(id) > 90): # array_offset = (len(id) % 90) # friends_list=[] # for x in range(1, array_offset): # print "cutted id is:" # print id[((x - 1) * 90):(x * 90)] # friends = [user.screen_name for user in self.api.lookup_users(user_ids=str(id[((x - 1) * 90):(x * 90)]))] # friends_list.extend(friends) # # else: # friends_list = [user.screen_name for user in self.api.lookup_users(user_ids=id)] friends_list = [user.screen_name for user in self.api.lookup_users(user_ids=id)] print "list of users\n" print friends_list friends_list_output = [[screen_name, id[indx], friend] for indx, friend in enumerate(friends_list)] print friends_list_output with open(users_friends_path, 'ab') as f: writer = csv.writer(f) writer.writerow(["screen_name", "id", "friends"]) writer.writerows(friends_list_output) time.sleep(1) def readfile(self): tweets_data = [] tweets_file = open(tweets_data_path, "r") for line in tweets_file: try: tweet = json.loads(line) tweets_data.append(tweet) except: continue print len(tweets_data) counter = 1 screen_names = [] for tweet in tweets_data: screen_names.extend(tweet['user']['screen_name']) try: self.new_get_all_tweets(tweet['user']['screen_name'], 5) except Exception, e: print "error:\n" print str(e) try: print tweet['user']['screen_name'] self.find_friends(tweet['user']['screen_name']) except Exception, e: print "fail:\n" print str(e) print counter # print 'tweet:' + tweet['text'] + "\n" # print 'user name:' + tweet['user']['name'] + "\n" # print 'user id:' + str(tweet['user']['id_str']) + "\n" # print "\nuser is flowing \n" # print 'user name:' + tweet['user']['name'] # # print "\n>>>>>>>>>>>>\n" counter = counter + 1
def __init__(self, queue=None, api=None): self.messagequeue = queue self.api = api or API()
# -*- coding: utf-8 -*- ''' Created on 2011-8-27 @author: redswallow ''' from tweepy.auth import OAuthHandler from tweepy.api import API consumer_key="o2K22DnJqSG0STjRbLUA";consumer_secret ="SV7I5YxQ8ehDCEBnKmCHYMTJW0Z0MLt3kpEdW9KhaCo" token="25798843-PyBwBx4AWqjUuSAm9yoKQuSvEtZQR78IEsuB7xGw";tokenSecret="nWhpP3g44eciBs0Db5SXQc8HJ0G53Rd2v4sAGJy3aTU" #get_api auth=OAuthHandler(consumer_key,consumer_secret) auth.set_access_token(token, tokenSecret) api=API(auth) api.update_status(u"oauth登录成功~~")
import sys,random,math,time sys.path.append("../lib/") from tweepy import api, error from tweepy.cache import FileCache,DBCache, DBFileCache from tweepy.api import API import tweepy from tweepy.models import * from tweeapi import APISingleton if __name__ == "__main__": while 1: try: auth = tweepy.OAuthHandler("xg2hLKvf1nxw1TUALvx5xA", "MkX0lDUik0mJuc6nxserddbQDWd7ZTErQN6Tf0OhOM") auth.set_access_token("174566652-MOGbxytlmUHIN5tEMgl5rgqWdWaIQXYZ6XPyYKl1", "yem38OfoUbsoPZvOVr3k0n3X7JSUDYD8oxAKXvrJw6k") twitterApi = API(auth_handler=auth, host='api.twitter.com', search_host='search.twitter.com', cache=None, secure=False, api_root='/1', search_root='', retry_count=0, retry_delay=0, retry_errors=None, parser=None) ret = twitterApi.rate_limit_status() print ret sys.exit(0) except Exception,e: print e pass
def __init__(self, api=None, interrupt=False, queue=None): self.api = api or API() #api access to Twitter self.interrupt = interrupt #interrupt signal for ending stream.filter self.queue = queue #place the tweets in a queue
def __init__(self, api=None): self.api = api or API() self.internal_list = [] self.filename_data = './Data/json_dump' self.filename_status = './Data_Status/json_dump' self.num_files_written = 0
config.read(CONF_INI_FILE) default = config['locations'] boundingbox = default[geo].split(',') boundingbox = [float(x) for x in boundingbox] if MODE == 0: out = rollfile(0) tries = 0 l = MyStreamListener() #l.output = out auth = OAuthHandler(keys['consumer_key'], keys['consumer_secret']) auth.set_access_token(keys['access_key'], keys['access_secret']) api = API(auth, wait_on_rate_limit=False, wait_on_rate_limit_notify=False) #l._api = api stream = Stream(auth, l) while True: list_terms = [ 'kill', 'news', 'fight', 'peace', 'elect', 'terror', 'earthquake', 'death', 'disaster', 'attack', 'major sports', 'shooting', 'crash', 'ISIS', 'PKK' ] try: #stream.filter(languages = ['en'], track=list_terms) #http://boundingbox.klokantech.com/ # boundingboxes = {} # boundingboxes['NYC'] = [-74.2852635,40.3161132,-73.50,40.9249936]
from tweepy import TweepError from tweepy.api import API from codecs import open with open('twitter_api_config.json') as f: apiConfig = json.load(f) ckey = apiConfig['ckey'] consumer_secret = apiConfig['consumer_secret'] access_token_key = apiConfig['access_token_key'] access_token_secret = apiConfig['access_token_secret'] # Twitter authentication. auth = OAuthHandler(ckey, consumer_secret) auth.set_access_token(access_token_key, access_token_secret) api = API(auth_handler=auth) def reindex(es, newIndex,count,outFile): query = { "query": { "function_score":{ "query":{ "bool": { "filter": [ { "term": { "start": "2017-05-24T21:16:27.396400-04:00" } } ]
def connect_to_api(auth): api = API(auth_handler=auth) api.verify_credentials() return api
forward_ds =[1] backward_ds = [1] forward_min = backward_min = 0 backward = [] is_opt = 0 max_step = 6 if __name__ == "__main__": while 1: try: #outfile = sys.argv[1] #auth = tweepy.BasicAuthHandler('reza_shz', 'mehdireza') auth = tweepy.OAuthHandler("xg2hLKvf1nxw1TUALvx5xA", "MkX0lDUik0mJuc6nxserddbQDWd7ZTErQN6Tf0OhOM") auth.set_access_token("174566652-MOGbxytlmUHIN5tEMgl5rgqWdWaIQXYZ6XPyYKl1", "yem38OfoUbsoPZvOVr3k0n3X7JSUDYD8oxAKXvrJw6k") twitterApi = API(auth_handler=auth, host='api.twitter.com', search_host='search.twitter.com', cache=FileCache("cache", timeout = -1), secure=False, api_root='/1', search_root='', retry_count=0, retry_delay=0, retry_errors=None, parser=None) username1 = sys.argv[1] username2 = sys.argv[2] user1 = twitterApi.get_user(username1) #@UndefinedVariable user2 = twitterApi.get_user(username2) #@UndefinedVariable forward = [] forward_ds = [1] backward_ds = [1] forward_min = backward_min = 0 backward = [] is_opt = 0