def get_twitters(): twitters = [] tags = request.args.get('hashtags',False).replace('#','%23').replace('@','%40').replace(' ','').split(',') try: if len(tags)>=1: search_settings = TwitterSearchOrder() # create a TwitterSearchOrder object search_settings.set_include_entities(False) # and don't give us all those entity information search_settings.set_keywords( tags ) # let's define all words we would like to have a look for # it's about time to create a TwitterSearch object with our secret tokens search_on_twitter = TwitterSearch( consumer_key = APP_KEY, consumer_secret = APP_SECRET, access_token = OAUTH_TOKEN, access_token_secret = OAUTH_TOKEN_SECRET ) # this is where the fun actually starsearch_on_twitter :) for twitter in search_on_twitter.search_tweets_iterable(search_settings): tw = { 'text' : twitter['text'], 'profile_image_url' : twitter['user']['profile_image_url'], 'name' : twitter['user']['name'], 'screen_name' : twitter['user']['screen_name'], 'location' : twitter['user']['location'] } if tw not in twitters: twitters.append( tw ) # return json return json.dumps(twitters) else: return json.dumps([]) except TwitterSearchException as e: # take care of all those ugly errors if there are some return json.dumps([])
def __init__(self, consumer_key, consumer_secret, access_token, access_token_secret, keyword, since_id, tweet_count=15): self._results = [] self._i = 0 print("since_id: {0}".format(since_id)) try: tso = TwitterSearchOrder() tso.set_keywords([keyword]) tso.set_language('ja') tso.set_include_entities(False) tso.set_count(tweet_count) if since_id > 0: tso.set_since_id(long(since_id)) ts = TwitterSearch( consumer_key = consumer_key, consumer_secret = consumer_secret, access_token = access_token, access_token_secret = access_token_secret ) for tweet in ts.search_tweets_iterable(tso): self._results.append( { 'screen_name': tweet['user']['screen_name'], 'user_name': tweet['user']['name'], 'profile_image_url': tweet['user']['profile_image_url'], 'text': tweet['text'], 'created_at': tweet['created_at'], 'id': tweet['id'] } ) except TwitterSearchException as e: print(e)
class TweetSearch: def __init__(self): self.ts = TwitterSearch( consumer_key = 'uVp0jLzC043jvVxsoYtO7XnTy', consumer_secret = 'zHHqf6gaRGeLX9PS4YB4BMhcUo7p8dyI02cZLxVQOTnoHEG0gh', access_token = '247768860-1BdrGZgXQibjaDSiZxGQ1MbjCxCEsM85gDFnRMjr', access_token_secret = 'ImetdaaKxq4uMvkQiMIxbGiR92ywqjYas52EZSXOyPu1t') # consumer_key = 'zTY2l3OYf9n50WgPG6KOCcr3J', # consumer_secret = 'sHqr1o1bCmW5xqPQE6wA7wCwsti00kT6hDnM6SlHNIr2kqStiJ', # access_token = '597976696-zDOpw9mCLkJ05JKXemq9OAJ1qf6pjVg0G4zhtCrl', # access_token_secret = 'lmiwWH69u5MfDGWNhXaFlcyo4882uN2Fm7dYxcAPVPaAq') def search(self,keywords): print keywords tso = TwitterSearchOrder() tso.set_keywords(keywords) tso.set_language('en') tso.set_include_entities(False) tweets = None try: tweets = self.ts.search_tweets_iterable(tso) print tweets except TwitterSearchException as e: # catch all those ugly errors print(e) return tweets
def jobInteraction(tag): tags = tag.split(",") tweets = [] try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_keywords(tags) # let's define all words we would like to have a look for tso.set_language('es') # we want to see German tweets only tso.set_include_entities(True) # and don't give us all those entity information # it's about time to create a TwitterSearch object with our secret tokens ts = TwitterSearch( # consumer_key='QVhq5HVolTuzE79c16YDegtia', # consumer_secret='bfSPCAKXca52eaa2GF4a4mGceYVy4VkENwSuQtnr2c9e34TgWq', # access_token='1196870581-DfDo1GQQaukRZQBWn72ugdATSJqKPb4EaMsOFRK', # access_token_secret='tRV1lizrsCj8maKxOkzcDvp6vGJLBgDXH0ueEzmXSQTOi' consumer_key='gDEFFAToqZ1j5cE9SgJkeqvBY', consumer_secret='jqKGAra9Kd0n4jwsQXkhairyxx0uv9D4iMme6AeE2NLDX3fPfz', access_token='17160146-FxfSx4Bdq7SvuENSgHvi175f7uyjwoHCHVMUYiJQP', access_token_secret='SREyq0DxHOurUY5E0AbT3kPDwl5IFDcPFmnehZjbaH5ab' ) # this is where the fun actually starts :) for tweet in ts.search_tweets_iterable(tso): # print('@%s tweeted: %s' % (tweet['user']['screen_name'], tweet['text'])) tweets.append(tweet) except TwitterSearchException as e: # take care of all those ugly errors if there are some print(e) print len(tweets) return json.dumps(tweets)
def search(query='cheeky nandos ledge banter', max=5): keywords = query.split() try: tso = TwitterSearchOrder() tso.set_keywords(keywords) # tso.set_language('en') # tso.set_include_entities(False) ts = TwitterSearch( consumer_key=app.config['TWITTER_CONSUMER_KEY'], consumer_secret=app.config['TWITTER_CONSUMER_SECRET'], access_token=app.config['TWITTER_ACCESS_TOKEN'], access_token_secret=app.config['TWITTER_TOKEN_SECRET'] ) results = [] for tweet in ts.search_tweets_iterable(tso): results.append(tweet['id']) # print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) ) max -= 1 if not max: break # print results return results except TwitterSearchException as e: # take care of all those ugly errors if there are some print(e)
def search_results(words): #auth = OAuthHandler(keys.consumer_key, keys.consumer_secret) #auth.set_access_token(keys.access_token, keys.access_token_secret) try: tso = TwitterSearchOrder() tso.set_keywords(words) rilo_twitter = TwitterSearch( consumer_key = keys.consumer_key, consumer_secret = keys.consumer_secret, access_token = keys.access_token, access_token_secret = keys.access_token_secret ) for tweet in rilo_twitter.search_tweets_iterable(tso): if tweet['retweet_count'] > 150: #API.retweet(tweet['id']) print('@%s tweeted: %s' % (tweet['user']['screen_name'], tweet['text'])) print('\n\n\n') except TwitterSearchException as e: print (e)
def search(): tw = [] try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_keywords([" "]) # let's define all search keywords - now, we want all tweets with a space in them tso.set_language("en") # we want to see english tweets only tso.set_include_entities(False) # and don't give us all those entity information ts = TwitterSearch( consumer_key="zg9yQTGTT2oizk3XLMHGLzfpJ", consumer_secret="nmiwqRpWDX0oxTCUTro8sPeUVUXIZHW9O1VZcTb0mLyfHw51sc", access_token="700001043-oxm3LZ72y4WmWGRqY66QjV0SzZoHGy5OGgwic26M", access_token_secret="hGJZWTb5bjGFSiuIQrff5UajKdlyXcp7Lyun5SJzq05Su", ) i = 0 for tweet in ts.search_tweets_iterable(tso): # if (tweet['retweet_count'] != 0): # tw.append((len(tweet['text'].split()), tweet['retweet_count'])) # print(str(i)) # backspace(len(str(i))) tw.append(tweet) if i == 300: break i += 1 return tw # print tw # print( '%s: @%s tweeted: %s' % ( tweet['retweet_count'], tweet['user']['screen_name'], tweet['text'] ) ) # print # of retweents, tweeter, and content of tweet except TwitterSearchException as e: # take care of all those ugly errors if there are some print (e)
def search(text,limit): tweets_list = [] try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_keywords(text) # let's define all words we would like to have a look for tso.set_language('en') # we want to see English tweets only tso.set_include_entities(False) # and don't give us all those entity information # it's about time to create a TwitterSearch object with our secret tokens ts = TwitterSearch( consumer_key = 'tbHIo3PImh0pSIETLlO8wIKj4', consumer_secret = 'QmzJYSAp9rw6O7tDJATkm7Avq0OBRTfZbdNf3BjEmDmdDB1jT2', access_token = '1315897358-IkDrUD4Zdy6HP3FjF4UxdBqICEZOU91Lys95FGu', access_token_secret = 'nHROttog8743ZmeBWeldvh24EHwXtW4h1Z69o1GsgV2zE' ) # this is where the fun actually starts :) cnt=0 for tweet in ts.search_tweets_iterable(tso): cnt+=1 if cnt>limit: break tweets_list.append(tweet['text']) #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) ) print cnt,'tweets' except TwitterSearchException as e: # take care of all those ugly errors if there are some print(e) return tweets_list
def serve_twitter_news(self): try: tso = TwitterSearchOrder() tso.set_keywords([topic]) tso.set_language('en') tso.set_include_entities(False) ts = TwitterSearch( consumer_key = my_consumer_key, consumer_secret = my_consumer_secret, access_token = my_access_token, access_token_secret = my_access_token_secret ) counter = 0 batch_size = 5 updates = [] for tweet in ts.search_tweets_iterable(tso): update = '@%s: %s' % ( tweet['user']['screen_name'].encode('utf-8').strip(), tweet['text'].encode('utf-8').strip() ) updates.append(update) logging.debug(update) counter += 1 if counter >= batch_size: self.send_JSON({ 'update' : updates }) break except TwitterSearchException as e: pass
def twitterStreaming(): from time import sleep sleep(5) try: # it's about time to create a TwitterSearch object with our secret tokens tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_keywords(['Swissquote']) # let's define all words we would like to have a look for tso.set_language('en') # we want to see German tweets only tso.set_include_entities(False) # and don't give us all those entity information lastID = 569803155141206016 tso.set_since_id(lastID) ts = TwitterSearch( consumer_key = 'a', consumer_secret = 'a', access_token = 'a-a', access_token_secret = 'b' ) for tweet in ts.search_tweets_iterable(tso): print( '[%s]@%s tweeted: %s' % ( tweet['created_at'], tweet['user']['screen_name'], tweet['text'] ) ) if(lastID < tweet['id']): lastID = tweet['id'] serveurStreaming.send(tweet['text']) except TwitterSearchException as e: # take care of all those ugly errors if there are some print(e) serveurStreaming.isConnectionAlive = False
def get(self, keyword=None, lang="en", max=20): if (keyword): try: tso = TwitterSearchOrder() tso.set_keywords([keyword]) tso.set_language(lang) tso.set_include_entities(False) ts = TwitterSearch( Config.settings['twitter']['consumer_key'], Config.settings['twitter']['consumer_secret'], Config.settings['twitter']['access_token'], Config.settings['twitter']['access_token_secret'] ) counter = 0 sleep_at = max if max is not None else 20 sleep_for = 30 for tweet in ts.search_tweets_iterable(tso): #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'].text.encode('utf-8'), tweet['text'].text.encode('utf-8') ) ) print tweet counter += 1 # increase counter if counter >= sleep_at: # it's time to apply the delay counter = 0 break #time.sleep(sleep_for) # sleep for n secs except TwitterSearchException as e: abort(500) return False
def pull_tweet_responses(username, tweet_id): """ Queries twitter for tweets mentioning user_id and afer tweet_id checks to see if found tweets are in response to tweet_id if response and not RT, saves relevant details to SQL database :param username: :param tweet_id: """ try: tso = TwitterSearchOrder() tso.set_keywords(['@' + username]) tso.set_language('en') tso.set_since_id(tweet_id) ts = TwitterSearch( consumer_key=api_key, consumer_secret=api_secret, access_token=access_token_key, access_token_secret=access_token_secret ) for tweet in ts.search_tweets_iterable(tso): if tweet['in_reply_to_status_id'] == tweet_id and \ tweet['text'][:2] != 'RT': write_response_to_mysql(tweet) except TwitterSearchException as e: print('\nTweet id: ' + str(tweet_id)) print(e)
def printUser(username): try: tuo = TwitterUserOrder(username) # create a TwitterUserOrder ts = TwitterSearch( consumer_key = '1kj4GBRevJITV4S40kLXGHVG2', consumer_secret = 'c80dJF41IwQV2G4ynR8VYblMQU15M4bc8OFg3aG6l8Y0aoSFhU', access_token = '1708110452-e3unR8gR7WRMGDoCh3aZutMPL3bFBLFlqHz8tzy', access_token_secret = 'kkiZDDp8KXLB8cRDwsMqBDc5IxqiaVXSmbQ2XtZEij0tl' ) def my_callback_closure(current_ts_instance): queries, tweets_seen = current_ts_instance.get_statistics() #print queries, tweets_seen if queries > 0 and (queries % 5) == 0: # trigger delay every 5th query time.sleep(60) # sleep for 60 seconds i = 0 # start asking Twitter about the timeline for tweet in ts.search_tweets_iterable(tuo, callback=my_callback_closure): if i > 50: break #print tweet['user']['screen_name'] content = tweet['text'].encode('utf-8') print content #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'].encode('utf-8') ) ) i += 1 return 1 except TwitterSearchException as e: # catch all those ugly errors print(e) return 0
def perform_search(request): """Create a page that counts hashtags""" tag_to_search = "" if request.method == "POST": tag_to_search = request.POST["search"] keyword = '"#' + tag_to_search + '"' users = [] postCount = 0 hashCount = Counter() uniqueHashCount = Counter() # Now try and talk to twitter try: tso = TwitterSearchOrder() tso.set_keywords([keyword]) # This is the value we search for tso.set_include_entities(True) # This is to include entity information, like Hashtags # This is the actual search. Secrets and key's have to be obtained from twitter, and aren't to be shared. ts = TwitterSearch(consumer_key="xxx", consumer_secret="yyy", access_token="qqq", access_token_secret="rrr") # This iterates through the found tweets for tweet in ts.search_tweets_iterable(tso): # count each tweet postCount += 1 # Add the organize and record the tweets for later access add_tweet(tweet, users) # now count them count_hashtags(hashCount, uniqueHashCount, users) new_id = save_data(keyword.upper(), hashCount, uniqueHashCount, postCount, len(users)) # catch errors except TwitterSearchException as e: # take care of all those ugly errors if there are some return str(e) # return that string return search(request, new_id)
def retrieveTweets(keyword): try: tso = TwitterSearchOrder() tso.set_keywords([keyword]) tso.set_language('en') tso.set_include_entities(False) ts = TwitterSearch( consumer_key = 'dxDoYB875ZUsvgPtp8EVDkyq6', consumer_secret = '6v4GiG1B3zKmJOsYPEtb0b39lv9da7iu7pIdAANyIoisoNrtZY', access_token = '2157789854-Fwr0uDJQ23twqSyxPEH0VnPwafQvpay8K2z7aFQ', access_token_secret = 'q9S6ECBpBv1RMBG8iNT8cYdoJvQAoIMZfMHAivs5Fh0PQ') htmlstring = "" print "lolpls" i = 0 for tweet in ts.search_tweets_iterable(tso): htmlstring += "<div><strong><a href='http://twitter.com/%s'>@%s</a></strong> %s" % (tweet['user']['screen_name'], tweet['user']['screen_name'], tweet['text']) + '</div>' i += 1 if i > 1: break except TwitterSearchException as e: print(e) return htmlstring
def getTweets(username): tFeeds=[] try: #tuo = TwitterUserOrder(username) # create a TwitterUserOrder tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_keywords([username]) tso.set_language('en') tso.set_count(50) tso.set_include_entities(False) tso.set_until(date.today()-timedelta(days=2)) # it's about time to create TwitterSearch object ts = TwitterSearch( consumer_key = '%%%', consumer_secret = '^^^', access_token = '&&&', access_token_secret = '@@@' ) # start asking Twitter counter=0 for tweet in ts.search_tweets_iterable(tso): if (counter==300): break tweetx=str(tweet['text'].encode('ascii', 'ignore')) counter=counter+1 tFeeds.append(tweetx) except TwitterSearchException as e: # catch all those ugly errors print(e) return tFeeds
def mainLoop(): try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_keywords([keyword1]) # let's define all words we would like to have a look for tso.set_language('en') # we want to see English tweets only tso.set_include_entities(False) # and don't give us all the entity information # create a TwitterSearch object with our secret tokens ts = TwitterSearch( consumer_key = consumerKey, consumer_secret = consumerSecret, access_token = accessToken, access_token_secret = accessSecret ) # call API print "Checking for new tweets that match keywords: %s or %s" % (keyword1,keyword2) for tweet in ts.search_tweets_iterable(tso): # bind variables to information from tweets we're interested in username = (tweet['user']['screen_name']).encode('ascii', 'replace') tweetText = (tweet['text']).encode('ascii', 'replace') date = (tweet['created_at']).encode('ascii', 'replace') if isStringinCSV([username, tweetText, date]) == False: # check to see if individual tweet from TwitterSearch object is in our log print "New Tweet!" writeToCSV([username, tweetText, date]) # if so, write to log print "Check complete." except TwitterSearchException as e: # take care of all those ugly errors if there are any print(e)
def crawl(filename, keywords, language): f = codecs.open(filename, "a", "utf-8") try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_keywords(keywords) # let's define all words we would like to have a look for tso.set_language(language) # we want to see German tweets only tso.set_include_entities(False) # and don't give us all those entity information # it's about time to create a TwitterSearch object with our secret tokens ts = TwitterSearch( consumer_key='MozbqzFag8UQMbuw9qkuyG7Fm', consumer_secret='c4m8EKOwQb90A3nLLySKSEkV7fVXe8taZq4IjgDrMVKihbNW4s', access_token='2684788650-VOzUZGhPItlgye6w5LhX5QMevWLK8WTALcxe8KM', access_token_secret='9IeW0F8XFnZ7FV5sCyZIahLEZBQTkzwO4L0q3vqRkl4je' ) # this is where the fun actually starts :) for tweet in ts.search_tweets_iterable(tso): tweet_text = tweet['text'].replace("\n", " ") id1 = uuid.uuid4() id2 = uuid.uuid4() label = random_label() f.write('@%s\t%s\t%s\t%s\n' % (id1, id2, label, tweet_text)) except TwitterSearchException as e: # take care of all those ugly errors if there are some print(e) f.close()
def get_tweet(token_num, user, clan): date = time.strftime('%Y-%m-%d %H:%M:%S') ts = TwitterSearch( consumer_key = access_tokens[token_num]['consumer_key'], consumer_secret = access_tokens[token_num]['consumer_secret'], access_token = access_tokens[token_num]['access_token'], access_token_secret = access_tokens[token_num]['access_token_secret'], ) tso = TwitterUserOrder(user) for tweet in ts.search_tweets_iterable(tso): #print('@%s tweeted: %s' % (tweet['user']['screen_name'].encode('utf-8'), tweet['text'].encode('utf-8'))) cursor.execute('INSERT IGNORE INTO tweets (date, name, clan, tweet) VALUES (%s, %s, %s, %s)', (date, user, clan, json.dumps(tweet),))
def HashTracker(): #ts = TwitterSearch( #consumer_key = 'oiqPmaj6hTVywkKlizDzw50l8', #consumer_secret = 'jHSBDW8E1doOnKKfabdMEnJFIIU0UCz9ufVwvhNhlDNu0Hessq', #access_token = 'boomgoes', #access_token_secret = 'thedynamite') try: tso = TwitterSearchOrder() tso.set_keywords(["#lol"]) tso.set_negative_attitude_filter() tso1 = TwitterSearchOrder() tso1.set_keywords(["#lol"]) tso1.set_positive_attitude_filter() ts = TwitterSearch( consumer_key = 'oiqPmaj6hTVywkKIizDzw50l8', consumer_secret = 'jHSBDW8E1doOnKKfabdMEnJFIIU0UCz9ufVwvhNhIDNu0Hessq', access_token = '42780587-4kuFLgjRn0sq4EyE1hUqdsXhkPRMt0SwvFfHsv3Dr', access_token_secret = 'MuljzOQYyOyaI4A0098vVDZc6xTcBeCfRSi0iUbYDMrDc') neg = 0 pos = 0 for tweet in ts.search_tweets_iterable(tso): neg = neg+1 print(neg) for tweet in ts.search_tweets_iterable(tso1): pos = pos+1 print(pos) except TwitterSearchException as e: print(e)
def coleta_tweets(query, max = 3000): #recebe o termo de consulta e a quantidade máxima de tweets por busca #escreve os resultados em um arquivo csv, sem sobreposição de dados #contador i = 0 #recebe o termo de busca search = query #abre um aquivo csv com o nome do termo de busca with open(search+'.csv', 'a') as outf: # o parametro "a" indica que não averá sobreposição #cada novo registro será adicionado ao final do arquivo writer = csv.writer(outf,delimiter='|') #utiliza o | como delimitador de campo writer.writerow(['user','time','tweet','latitude','longitude']) try: tso = TwitterSearchOrder() tso.set_keywords([search]) tso.set_language('pt') # Apenas Tweets em portugues ts = TwitterSearch( access_token = '', access_token_secret = '', consumer_key = '', consumer_secret = '' ) for tweet in ts.search_tweets_iterable(tso): lat = None long = None time = tweet['created_at'] # base UTC user = tweet['user']['screen_name'] tweet_text = unicodedata.normalize('NFKD',tweet['text'].strip()).encode('ascii', 'ignore') #tweet_text = tweet['text'].strip().encode('ascii', 'ignore') tweet_text = ''.join(tweet_text.splitlines()) print i,time, if tweet['geo'] != None and tweet['geo']['coordinates'][0] != 0.0: #tratamos para evitar sujeira lat = tweet['geo']['coordinates'][0] long = tweet['geo']['coordinates'][1] print('@%s: %s' % (user, tweet_text)), lat, long else: print('@%s: %s' % (user, tweet_text)) writer.writerow([user, time, tweet_text, lat, long]) i += 1 if i > max: return() except TwitterSearchException as e: #apenas para mostrar o tipo de erro, caso exista print(e)
class TwitterScrape(ScrapeHelper): # ---------------------------------------------------------------------------------------------------------------------------------------- # For twitter we are using the python module Twitter Search - which is a wrapper around the standard APIs provided by Twitter. # We need to initialize consumer key and secret and access token and secret. That's what (and other is required in the future) we are doing # here. def __init__(self, helperObject): # it's about time to create a TwitterSearch object with our secret tokens self.ts = TwitterSearch( consumer_key=helperObject.moreConfig["apiTokens"]["twitter_consumer_key"], consumer_secret=helperObject.moreConfig["apiTokens"]["twitter_consumer_secret"], access_token=helperObject.moreConfig["apiTokens"]["twitter_access_token"], access_token_secret=helperObject.moreConfig["apiTokens"]["twitter_access_token_secret"], ) self.domain = "twitter" self.actualPost = None self.postTime = None self.resultsFound = False # ---------------------------------------------------------------------------------------------------------------------------------------- # ---------------------------------------------------------------------------------------------------------------------------------------- # method that actually scrapes Twitter def scrapeIt(self, helperObject): try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_keywords( [helperObject.currentlySearchingFor] ) # let's define all words we would like to have a look for tso.set_include_entities(False) # and don't give us all those entity information for tweet in self.ts.search_tweets_iterable(tso): # We directly have the link to the Tweet itself. So our actual post param hods just the link. # Hitting this link in the browser would take you to the actual tweet itself. self.resultsFound = True self.actualPost = "https://twitter.com/statuses/" + str(tweet["id"]) self.postTime = str(tweet["created_at"].encode("utf-8")) print "@" + str((tweet["user"]["screen_name"]).encode("utf-8")) + " tweeted " + str( tweet["text"].encode("utf-8") ) + "and the time was " + self.postTime + " and the id of the tweet is " + self.actualPost helperObject.prepareDbData(self.domain, self.actualPost, self.postTime) if not self.resultsFound: print "\nBooo...! the lad's missing !" print "\n\n" except TwitterSearchException as e: # take care of all those ugly errors if there are some print (e)
def twitSearch(keywords): # create a TwitterSearchOrder object, configure settings tso = TwitterSearchOrder() tso.set_keywords(keywords) # tso.set_language('en') # filter by language? no, for now tso.set_include_entities(True) # setup all the secrets ts = TwitterSearch( consumer_key = private.consumer_key, consumer_secret = private.consumer_secret, access_token = private.access_token, access_token_secret = private.access_token_secret ) # do the search, save it for amounts of fun that are not rate limited return ts.search_tweets_iterable(tso)
def getTweets(keywords): # let's set a limit to 1000 tweets limit_count = 1000 # pega o input e transformar em um array de strings keywords = keywords.split(" ") try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_keywords(keywords) # let's define all words we would like to have a look for tso.set_language('pt') # we want to see German tweets only tso.set_count(100) # please dear Mr Twitter, only give us 100 results per page tso.set_include_entities(False) # and don't give us all those entity information # tso.setGeocode(-23.516394,-46.63554,1000,km=True)#I want only posts near Sao Paulo # tso.setUntil(datetime.date(2014, 01, 26)) ts = TwitterSearch( consumer_key = 'VqUEyEu7lXO5z5lWqoTkYOUmZ', consumer_secret = 'aiGUuiCwWNrYherJ5USuPkeZi3WMdCJj0ZsIGtWSHlZgPeqpmI', access_token = '490611801-3ygNslO3ZvKXsGm1wZA1AdIwKO858jAsa66orMbd', access_token_secret = 'NZKkVwOYnz5BR1rf34PLHuODDnYlIaf52fbNBcMuUvU7b' ) for tweet in ts.search_tweets_iterable(tso): # this is where the fun actually starts :) print( 'at %s' % tweet['created_at'], tweet['text']) date = tweet['created_at'] info = tweet['text'] + " " + date data2Save = unicodedata.normalize('NFKD', info).encode('utf-8','ignore') saveFile = open('tweets.txt', 'a') saveFile.write(data2Save) saveFile.write('\n') saveFile.close() limit_count -= 1 if limit_count == 0: break except TwitterSearchException as e: # take care of all those ugly errors if there are some print(e)
def Tweety(): try: Twitter_User = TwitterUserOrder('EmreOvunc') Twitter_Obj = TwitterSearch( consumer_key = 'XX', consumer_secret = 'XX', access_token = 'XX', access_token_secret = 'XX' ) for tweet in Twitter_Obj.search_tweets_iterable(Twitter_User): tweets=( '@%s : %s \n' % (tweet['user']['screen_name'], tweet['text']) ) # You can use if statement for spesific tweets # or skip this part to save all tweets if '#TweetyBot' in tweets: if CheckTweets(tweets[13:])== 1: break; except TwitterSearchException as error: print(error)
def collectTweets(file,keywords,collection,numTweets): header = False if os.path.exists(file): header = True #open a csv file for writing if "Test" in file: csvfile= open(file, 'wb') else: csvfile= open(file, 'ab') writer = writeHeader(csvfile,header) names = keywords.split() entity =[] for keyowrd in names: entity.append(keyowrd.lower()) try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_keywords(entity) tso.set_language('en') tso.set_include_entities(False) # don't give all the entity information # create a TwitterSearch object with our secret tokens ts = TwitterSearch( consumer_key = '', consumer_secret = '', access_token = '', access_token_secret = '' ) global count for tweet in ts.search_tweets_iterable(tso): tweetText = tweet['text'] tweetText = removeHttps(tweetText.encode('ascii','ignore')) insertIntoDB(collection,tweetText,writer,keywords) if count == numTweets: break; print("Total tweets collected for "+keywords+ " are "+ str(count)) except TwitterSearchException as e: # handle errors if there are some print(e) finally: csvfile.close()
def tweetFetch(keywords): filtered_tweets=[] try: print "fetching from Twitter" tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_keywords(keywords) # let's define all words we would like to have a look for tso.set_count(10) # tso.set_result_type("popular") tso.set_language('en') # we want to see English tweets only tso.set_include_entities(False) # and don't give us all those entity information # it's about time to create a TwitterSearch object with our secret tokens ts = TwitterSearch(consumer_key='GML650EJDB9kGTmLOV1fmiKnE', consumer_secret='Lssnyk1dDmPYS86USSCjD8SBV3HAJPjUJQNemAhowv6jCwIVez' , access_token='3223420976-qydbZ9X79cLsKhUVdqTBOs7Rg9sh3sTjUNzuc3B' , access_token_secret='bs4LDtdKkkCTpTZyqm49hSF1o2igfFkw2eSHvYbLigAsM' ) # this is where the fun actually starts :) count=0 tweet_limit=9 #limit to first 10 results for tweet in ts.search_tweets_iterable(tso): # print tweet['text'] sent_resp=sentimentAnalysis(tweet['text']) count+=1 tweet_object={} tweet_object['post']=tweet['text'] if 'score_tag' in sent_resp: tweet_object['score_tag']=sent_resp['score_tag'] if(tweet_object not in filtered_tweets): filtered_tweets.append(tweet_object) print count if(count>tweet_limit): break except TwitterSearchException, e: print e # take care of all those ugly errors if there are some
def search(searchTerm,lang=LANGUAGE): try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_keywords([searchTerm]) # let's define all words we would like to have a look for tso.set_language(lang) # we want to see German tweets only tso.set_include_entities(True) # it's about time to create a TwitterSearch object with our secret tokens ts = TwitterSearch( consumer_key = TWIT_CONSUMER_KEY, consumer_secret = TWIT_CONSUMER_SECRET, access_token = TWIT_ACCESS_TOKEN, access_token_secret = TWIT_ACCESS_SECRET ) return ts.search_tweets_iterable(tso) except TwitterSearchException as e: # take care of all those ugly errors if there are some print (e) return -1
def get(self, crowd_request, data, **kwargs): try: # import ipdb; ipdb.set_trace() tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_keywords(['earthquake']) # let's define all words we would like to have a look for tso.set_include_entities(False) # and don't give us all those entity information ts = TwitterSearch( consumer_key = 'O92NoyCEQsUq7swRKg', consumer_secret = 'dD7NP6ZTOv9KX28Iw6O9gtgu5MpbzTG5qyfdd7S99Y', access_token = '46171206-lfcESnE0WfZ8iCb4QEfreOco3PuLodM0p2lp3gC9s', access_token_secret = 'CURU7xIS2InzDHF5LtPpZ8gLXjWg0M3okMbmCHrIWdI' ) for tweet in ts.search_tweets_iterable(tso): print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) ) tweet_id = tweet["id"] break except TwitterSearchException as e: # take care of all those ugly errors if there are some print e return {"status": "OK", "tweet_id": tweet_id, "template": "pick_tweet_hashtags.html"}
def getTweets(keywords): data=[] try: ts = TwitterSearch( consumer_key = environ['CK'], consumer_secret = environ['CS'], access_token = environ['AT'], access_token_secret = environ['ATS'] ) tso = TwitterSearchOrder() tso.set_keywords(keywords, or_operator=True) for tweet in ts.search_tweets_iterable(tso): ts = time.strftime('%m-%d-%y %H:%M', time.strptime(tweet['created_at'],'%a %b %d %H:%M:%S +0000 %Y')) data.append({'date': ts, 'text': tweet['text'], 'avatar': tweet['user']['profile_image_url_https'], 'user': tweet['user']['screen_name'], 'id': tweet['id'], 'rt': tweet['retweet_count']}) return(data) except TwitterSearchException as e: print(e)
sarcastic_tso.set_include_entities(True) sarcastic_tso.arguments.update({"tweet_mode": "extended"}) if args.serious_path: serious_tso = TwitterSearchOrder() serious_tso.set_keywords(["-#sarcasm"]) # query tweets w/o #sarcasm serious_tso.set_language('en') serious_tso.set_include_entities(True) serious_tso.arguments.update({"tweet_mode": "extended"}) # query twitter API and populate tweet lists try: ts = TwitterSearch(consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET, access_token=ACCESS_TOKEN, access_token_secret=ACCESS_SECRET) if args.sarcastic_path: for sarcastic_tweet in ts.search_tweets_iterable(sarcastic_tso): if not sarcastic_tweet['full_text'].lower().startswith('rt'): sarcastic_tweets.append({ 'id': sarcastic_tweet['id'], 'urls': not not sarcastic_tweet['entities']['urls'], 'media': "media" in sarcastic_tweet["entities"], 'text': sarcastic_tweet['full_text'] }) if args.serious_path: for serious_tweet in ts.search_tweets_iterable(serious_tso): if not serious_tweet['full_text'].lower().startswith('rt'): serious_tweets.append({
def convert(schoolName,key1,key2,key3,key4): #importing the databases of campuses & locations campus_loc = {} #{name:[long:lat]} with open('campuses.csv') as f: reader = csv.reader(f) reader.next() for row in reader: campus_loc[row[1]] = [float(row[3]), float(row[4])] school = schoolName #takes input and renames it for convenience's sake since every other variable name is schoolName #whoops """ counter = 0 #this entire block will never be run unless we take user input while school not in campus_loc.keys(): counter += 1 print "Sorry! Doesn't look like we have that school. Try again?" if counter > 5: print "Wow, you really suck at this." school = raw_input("> ") """ location = campus_loc[school] #gets the location from campus_loc using the school name ############## begin Twitter API call now try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_keywords([' ']) # let's define all words we would like to have a look for tso.set_geocode(location[1],location[0],1) #tso.set_language('de') # we want to see German tweets only tso.set_include_entities(False) # and don't give us all those entity information # it's about time to create a TwitterSearch object with our secret tokens ts = TwitterSearch( consumer_key = str(key1), consumer_secret = str(key2), access_token = str(key3), access_token_secret = str(key4) #consumer_key = 'ZrOjYYBF9ALwFlHjnJ6uRwBk6', #consumer_secret = 'QraDsU5pdeSTT7qNcReCJZYwmX94Q6S7yb0EcagPumCNmafufq', #access_token = '16986893-nUTUdtEcf2HffYJYpVJYbR2p85EeSmvvZSD2VsCLS', #access_token_secret = 'I8V9DJNFWQX0wh0gNbfLT2bAWpj4uS7rtUioFqyDjnVf3' # consumer_key = 'naPGQJt3L75sZiEZAplsETEp1', # consumer_secret = 'rqyRlly80nw1XUGxt7ySR2ZUvQiqDKpbj8kEgDXJ63U1AWbvAr', # access_token = '16986893-Zq2L75kDTGR0l3AnsSN1ZXkzw4o8NuBssLCyvxkES', # access_token_secret = 'woWPf0kgYoJcz6w4mNUBc8tt2bWqwdSCoMPEYXsWU9Y6w' ) # this is where the fun actually starts :) collegeTweets = [] counter = 0 for tweet in ts.search_tweets_iterable(tso): if counter < 50: collegeTweets.append(tweet['text'].encode('ascii','ignore')) #ignores any ASCII collegeTweets[-1] = string.replace(collegeTweets[-1],"#","") #pulls out pound signs to make hashtags part of the sentence/string collegeTweets[-1] = string.replace(collegeTweets[-1],"\n","") #pulls out any returns which f**k up the sentence collegeTweets[-1] = string.replace(collegeTweets[-1],"@","") #pulls out any mentions collegeTweets[-1] = remove_http(collegeTweets[-1],"http") #pulls out any links (aka to pictures since the majority of these are linked from Instagram) counter += 1 collegeTweets[-1] += "a" except TwitterSearchException as e: print(e) ############### begin Indico API call now if not collegeTweets: return 0.5 indicoio.config.api_key = "f09f509655f721e3adac6df5b35abfed" api_key_Lisa = "f09f509655f721e3adac6df5b35abfed" sentementCollegeTweets = sentiment(collegeTweets) #take the Twitter string agnd put it into our own string because we needed an array of the indico results average = 0.0 for i in sentementCollegeTweets: average += i average = average/len(sentementCollegeTweets) print average
access_token='', access_token_secret='') # create twitter search token SelfieTweets = pickle.load(open("SelfieTweets", "r")) usercount = 0 for selfietweet in SelfieTweets[0:len(SelfieTweets)]: usercount += 1 print usercount # setting user to search: user = int(selfietweet["UserID"]) print user try: tuo = TwitterUserOrder(user) # create a TwitterSearchOrder object tweets = [] count = 0 for tweet in ts.search_tweets_iterable(tuo): #search tweets count += 1 try: TweetID = tweet["id_str"] UserID = str(tweet["user"]["id_str"]) #collecting data text = tweet['text'] #appending tweet to tweets array data = { "UserID": UserID, "TweetID": TweetID, "text": text, "Json": tweet } tweets.append(data)
def _do_ocr_and_lookup(img_obj): limit_of_tweets = int(args.limit) # Replace line breaks with a space and split text into an array text = pytesseract.image_to_string(img_obj, lang='eng').replace('\n', ' ').split(' ') for element in text: if element and element[0] == '@': # Since handles cannot have spaces, strip until space potential_user = element.split(' ')[0] break config = configparser.RawConfigParser() config.readfp(open('twitter.config')) # Just in case, the dude/dudette using the program puts in ' or " in the config. consumer_key = config.get('twitter', 'consumer_key').replace('\'', '').replace('\"', '') consumer_secret = config.get('twitter', 'consumer_secret').replace( '\'', '').replace('\"', '') access_token = config.get('twitter', 'access_token').replace('\'', '').replace('\"', '') access_token_secret = config.get('twitter', 'access_token_secret').replace( '\'', '').replace('\"', '') try: tuo = TwitterUserOrder(potential_user[1:]) ts = TwitterSearch(consumer_key=consumer_key, consumer_secret=consumer_secret, access_token=access_token, access_token_secret=access_token_secret) tweets = [] for tweet in ts.search_tweets_iterable(tuo): # Nobody cares about re-tweets if 'RT ' not in tweet['text']: if tweet not in tweets: tweets.append((tweet['text'], tweet['id'])) if not limit_of_tweets: break else: limit_of_tweets -= 1 body = text[text.index('') + 1:] try: stripped_body = body[:body.index('')] except: stripped_body = body for tweet in tweets: removed_elements = 0 ltweet, orig_len = tweet[0].split(' '), len(tweet[0].split(' ')) for ele in stripped_body: if ele in ltweet: removed_elements += 1 ltweet.remove(ele) removal_rate = (removed_elements / float(orig_len)) * 100 if removal_rate > 75.0: print("*** Tweet is probably real! ***") print("-> Confidence : " + str(removal_rate)) print("-> URL : https://twitter.com/" + potential_user[1:] + "/status/" + str(tweet[1])) except TwitterSearchException as e: # catch all those ugly errors print(e)
tso.set_language('en') tso.set_include_entities(False) userTweets = {} ts = TwitterSearch(consumer_key = '[your consumer key]',\ consumer_secret = '[your consumer secret]',\ access_token = '[access token]',\ access_token_secret = '[access token secret]'\ ) outfile = open('tweetfile.txt', 'w') outfile.write('User\t\t\Tweets') outfile.write('\n.........................................................') for tweet in ts.search_tweets_iterable(tso): username = tweet['user']['name'] + '(@' + tweet['user']['screen_name'] + ')' if username in userTweets.keys(): tweetlist = userTweets[username] tweetlist.append(tweet['text'].encode('utf8')) userTweets[username] = tweetlist else: userTweets[username] = [tweet['text'].encode('utf8')] for usrNm, tweets in userTweets.iteritems(): for tweet in tweets: outfile.write("\n" + usrNm.encode('utf8') + "\t\t" + tweet + "\n") outfile.close()
def gettweets(hashtag): if (os.path.isdir(".cache/")): datei = '.cache/{}.json'.format(hashtag) if not (os.path.isdir(".cache/")): os.system("mkdir .cache") print("Cache Ordner wurde erstellt!") datei = '.cache/{}.json'.format(hashtag) if (os.path.isfile(datei) and chachetime(datei)): # Wenn Cache existiert with open(datei) as cachefile: # Lese Cache aus print("Ausgabe aus Cache " + datei) print("Cache wurde zuletzt vor " + str(round(daysDiff / 3600, 2)) + " Stunde/n aktualisiert.") return json.loads(cachefile.read()) # Gebe Cache aus print("Chache {} wird neu erstellt".format(datei)) tso = TwitterSearchOrder() # Twitter Objekt erstellen tso.set_keywords([hashtag]) # Wir suchen nach einem hashtag tso.set_language('de') # Nur Deutsche Tweets tso.set_include_entities(False) # Kein Entity Zeug ausgeben Config = configparser.ConfigParser() configfiles = ['config.ini', 'config2.ini', 'config3.ini'] configfile = random.choice(configfiles) print("Verwende " + configfile) if os.path.isfile(configfile): Config.read(configfile) else: print( "The config file does not exist, please create a new config with the example file" ) sys.exit() consumer_key = Config.get("Twitter API", "consumer_key") consumer_secret = Config.get("Twitter API", "consumer_secret") access_token = Config.get("Twitter API", "access_token") access_token_secret = Config.get("Twitter API", "access_token_secret") # Objekt mit Zugangsdaten erstellen ts = TwitterSearch(consumer_key=consumer_key, consumer_secret=consumer_secret, access_token=access_token, access_token_secret=access_token_secret) tweets = set() counter = 0 for tweet in ts.search_tweets_iterable(tso): if (counter <= 100): counter += 1 tweets.add(tweet['text']) else: break tweets = list(tweets) with open(datei, 'w') as cachefile: tweetsasjson = json.dumps(tweets) cachefile.write(tweetsasjson) if (datei == "jugendhackt.json"): # os.system("sudo cp /home/pi/Hashdistribution/backend/.cache/jugendhackt.json /var/www/html/data/jh.json") print("Startseiten Tweets aktualisiert!") return tweets
def find_polarity(topic): try: tso = TwitterSearchOrder() tso.set_keywords([topic]) tso.set_language('en') ts = TwitterSearch( consumer_key='JUBWToPuyPfmzg8n117ZTllfB', consumer_secret= 'lt0Psg46Nqzzaa4uel3wtSbaOyh9WiYIqx6ZH5xaExthndrsc1', access_token='1172272055183728640-nLQg9fvsLVieB9BXSsJq86a6kMmR8p', access_token_secret='5ogC7PXA1nmlNd5FCYtNaSIhF7tyA5K7CZzNBhi8qIhv1' ) # lists that will become columns in our data frame user_list = [] clean_list = [] favorited_list = [] retweet_list = [] its_a_retweet = [] count = 0 for tweet in ts.search_tweets_iterable(tso): if count == 50: break tw_obj = api.get_status( tweet['id_str'] ) # need the twitter object to check if it's a retweet if not hasattr(tw_obj, 'retweeted_status'): user_list.append(tweet['user']['screen_name']) # user cleaned = clean_text( tweet['text'] ) # removing unnecessary symbols from the tweet's string clean_list.append(cleaned) favorited_list.append( tweet['favorite_count']) # num of favorites retweet_list.append(tweet['retweet_count']) # num of retweets its_a_retweet.append(hasattr(tw_obj, 'retweeted_status')) count += 1 # creating the data frame # To make a column, you need a list df = pd.DataFrame(user_list) df['Tweet Text'] = clean_list df['Number of Favorites'] = favorited_list df['Number of Retweets'] = retweet_list df['Polarity'] = df['Tweet Text'].apply(Polarity) # df['Retweet?'] = its_a_retweet # print(df) # sort data frame by polarity sortedDF = df.sort_values(by=['Polarity'], ignore_index=True, ascending=False) # sort data frame by favorites sortedFavDF = df.sort_values(by=['Number of Favorites'], ignore_index=True, ascending=False) # gathering polarity data from df pol_count = 0.0 for i in df.index: pol_count += df['Polarity'][i] * df['Number of Favorites'][i] + df[ 'Polarity'][i] * 2.0 * df['Number of Retweets'][i] pol_count += df['Polarity'][i] avg_pol = pol_count print("Average Polarity: ", avg_pol) print("Overall Rating: ", rating(avg_pol), '\n') label['text'] = "Average Polarity: " + str( avg_pol) + '\n' + "Overall Rating: " + rating(avg_pol) + '\n' # 5 Most Popular Tweets print("5 most popular tweets: ") label['text'] += '\n' + "5 most popular tweets: " + '\n' for i in range(0, 5): label['text'] += str( i + 1) + ") " + sortedFavDF['Tweet Text'][i] + '\n' print(str(i + 1) + ") " + sortedFavDF['Tweet Text'][i]) # 5 Most Positive Tweets print("\n5 Most Positive Tweets: ") label['text'] += '\n' + "5 most positive tweets: " + '\n' for i in range(0, 5): label['text'] += str(i + 1) + ") " + sortedDF['Tweet Text'][i] + '\n' print(str(i + 1) + ") " + sortedDF['Tweet Text'][i]) print("\n5 Most Negative Tweets: ") label['text'] += '\n' + "5 most negative tweets: " + '\n' j = 1 for i in range(df['Tweet Text'].size, df['Tweet Text'].size - 5, -1): label['text'] += str(j) + ") " + sortedDF['Tweet Text'][i - 1] + '\n' print(str(j) + ") " + sortedDF['Tweet Text'][i - 1]) j += 1 except TwitterSearchException as e: print(e)
def get_all_tweets(screen_name): # Twitter only allows access to a users most recent 3240 tweets with this method # authorize twitter, initialize tweepy auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_key, access_secret) api = tweepy.API(auth) # initialize a list to hold all the tweepy Tweets alltweets = [] # make initial request for most recent tweets (200 is the maximum allowed count) new_tweets = api.user_timeline(screen_name=screen_name, count=10) # save most recent tweets alltweets.extend(new_tweets) # save the id of the oldest tweet less one oldest = alltweets[-1].id - 1 # keep grabbing tweets until there are no tweets left to grab while len(new_tweets) > 0: # all subsiquent requests use the max_id param to prevent duplicates new_tweets = api.user_timeline(screen_name=screen_name, count=10, max_id=oldest) # save most recent tweets alltweets.extend(new_tweets) # update the id of the oldest tweet less one oldest = alltweets[-1].id - 1 if (len(alltweets) > 15): break print("...%s tweets downloaded so far" % (len(alltweets))) # write tweet objects to JSON file = open('tweet.json', 'w') print("Writing tweet objects to JSON please wait...") for status in alltweets: json.dump(status._json, file, sort_keys=True, indent=4) # close the file print("Done") file.close() for tweet in tweepy.Cursor(api.search, q='BBC').items(20): print('Tweet by: @' + tweet.user.screen_name) # key word for search in BBC new key_word = input("type in the keyword in BBC news to search: \n") try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_keywords([ 'BBC News', key_word ]) # let's define all words we would like to have a look for tso.set_language('en') #only English tweets tso.set_include_entities( False) # and don't give us all those entity information # exclude the retweet!!! # it's about time to create a TwitterSearch object with our secret tokens ts = TwitterSearch( # Twitter API credentials # warning!!!! # Before upload to GIthub, replace them!!! consumer_key="", consumer_secret="", access_token="", access_token_secret="") file = open('TweetOutput.txt', 'w+') print("Writing tweet objects to txt, please wait...") number = 0 # record for number of tweets we have # this is where the fun actually starts :) for tweet in ts.search_tweets_iterable(tso): if number <= 100: # file.write('@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] )) file.write('@%s tweeted: %s' % (tweet['user']['screen_name'], tweet['text'])) number += 1 else: file.write("100 tweets we have now!") print("100 tweets we have now!") break print("Done") file.close() except TwitterSearchException as e: # take care of all those ugly errors if there are some print(e)
def main(accountName, key1, key2, key3, numberTweets): try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tuo = TwitterUserOrder(accountName) # create a TwitterUserOrder # use parameters instead of fixed string tso.set_keywords([key1,key2, key3]) # let's define all words we would like to have a look for #tso.set_language('en') # we want to see English tweets only tso.set_include_entities(False) # and don't give us all those entity information # optional: limit the range of time, no need here # most recent tweets # 8/27/2019 is the date burgers come out # it's about time to create a TwitterSearch object with our secret tokens ts = TwitterSearch( #Twitter API credentials #warning!!!! #Before upload to GIthub, replace them!!! consumer_key = "M", consumer_secret = "M", access_token = "M", access_token_secret = "Mine" ) #write tweet objects to txt/Json file file = open('searchTweetOutput.txt', 'w+') print ("Writing tweet objects to txt, please wait...") num = 0; #record for number of tweets we have numberTweet = int (numberTweets) # this is where the fun actually starts :) for tweet in ts.search_tweets_iterable(tso): if num < numberTweet: # exclude the retweet if (not tweet['retweeted']) and ('RT @' not in tweet['text']): #file.write('@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] )) file.write('%s tweeted: %s . This tweet ends. \n' % ( tweet['user']['screen_name'], tweet['text'] )) num += 1 else: result = numberTweets + " tweets we have now." file.write(result) print(numberTweets, " tweets we have now") break #close the file print ("Done") file.close() """ num = 0; #record for number of tweets we have # this is where the fun actually starts :) for tweet in ts.search_tweets_iterable(tso): if num <= 100: print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) ) num += 1 else: print("100 tweets we have now") break """ except TwitterSearchException as e: print(e)
def _do_ocr_and_lookup(img_obj): config = get_config() limit_of_tweets = int(args.limit) potential_user = '******' # Replace line breaks with a space and split text into an array text = pytesseract.image_to_string( img_obj, lang='eng').replace( '\n', ' ').split(' ') for element in text: if element and element[0] == '@': print("Detected handle : " + str(element)) # Since handles cannot have spaces, strip until space potential_user = element.split(' ')[0] break # Just in case the person Yousing the program puts in ' or " in the config. consumer_key = config.get( 'twitter', 'consumer_key').replace( '\'','').replace( '\"','') consumer_secret = config.get( 'twitter', 'consumer_secret').replace( '\'','').replace( '\"','') access_token = config.get( 'twitter', 'access_token').replace( '\'','').replace( '\"','') access_token_secret = config.get( 'twitter', 'access_token_secret').replace( '\'','').replace( '\"','') if potential_user == '__fakemenot__': print(colored("[*] It looks like OCR failed. Please make sure you " + "crop the image as in sample and is readable.", 'red')) exit(1) try: tuo = TwitterUserOrder(potential_user[1:]) ts = TwitterSearch( consumer_key=consumer_key, consumer_secret=consumer_secret, access_token=access_token, access_token_secret=access_token_secret ) tweets = [] body = '__awesomebody__' for tweet in ts.search_tweets_iterable(tuo): # Nobody cares about re-tweets if 'RT ' not in tweet['text']: if tweet not in tweets: tweets.append((tweet['text'], tweet['id'])) if not limit_of_tweets: break else: limit_of_tweets -= 1 # The most probable tweet body is this. try: body = text[text.index('V') + 1:] except ValueError: body = text # If none of that was found, let's report an OCR error if body == '__awesomebody__': print(colored("[*] It looks like OCR failed.Please make sure you " + "crop image as in sample and is readable.", 'red')) found_tweet = False # Check against every tweet pulled for tweet in tweets: removed_elements = 0 ltweet, orig_len = tweet[0].split(' '), len(tweet[0].split(' ')) # Compare each element of body to element in body. TODO: Optimize for ele in body: if ele in ltweet: removed_elements += 1 ltweet.remove(ele) removal_rate = (removed_elements / float(orig_len)) * 100 if int(removal_rate) > 75: found_tweet = True print(colored("[*] It looks like this is a valid tweet", 'green')) print(colored("-> Confidence : " + "%.2f" % removal_rate + "%", 'green')) print(colored("-> Potential URL : https://twitter.com/" + potential_user[1:] + "/status/" + str(tweet[1]), 'green')) elif int(removal_rate) in (55, 75): found_tweet = True print(colored("[*] This might be a valid tweet", 'yellow')) print(colored("-> Confidence : " + "%.2f" % removal_rate + "%", 'yellow')) print(colored("-> Potential URL : https://twitter.com/" + potential_user[1:] + "/status/" + str(tweet[1]), 'yellow')) if not found_tweet: print(colored("[*] I couldn't find a tweet like that. " + "Try increasing the limit to pull more tweets", 'yellow')) except TwitterSearchException as e: # catch all those ugly errors print(e)
def getTweets(query): #pp = pprint.PrettyPrinter(indent=4) try: if not sample: tso = TwitterSearchOrder() # create a TwitterSearchOrder object print(query) tso.set_keywords([ query ]) # let's define all words we would like to have a look for tso.set_language('en') # we want to see English tweets only tso.set_include_entities( False ) # and don't give us all those entity information #from original code, idk what it does #my API data. ts = TwitterSearch( consumer_key='SwtLcZe9Im6q998K4cJqANs4n', consumer_secret= '7PMRM3ec7ltINPVl72FXurMn8Qg9HrS1NKwocYJVlTGngEFbEA', access_token= '51466054-cJUBESD4H9THIQExiKQ1HOGdR0GflXdyeIeL0TfKw', access_token_secret= 'nn3ESWtluVoLSNFexAKcEesF6rEg0lTJ4QaIbFHJACFDr') count = 1000 #how many tweets we want to see. we want as many as possible, but do not want to sacrifice load time too much i = 0 tweet_list = [] if sample: print("Reading Sample File") for line in file.read().split('\n'): tweet_list.append(line) else: print("Searching....") for tweet in ts.search_tweets_iterable(tso): if i >= count: break #stops getting tweets when we have enough #keep this line below as a reference. from the original code: #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) ) words = tweet['text'] start = re.search("(((RT )?@(\w)*) ?:? )?", words) words = words.lstrip(start.group(0)) tweet_list.append(words) i += 1 # if we have less than 1000 tweets, the corpus is too short. if (len(tweet_list) < 1000): print( "Sorry! Your search did not return enough results, please try another." ) return print("Search complete!") print("Tagging...") tagged = CMU.runtagger_parse(sent_tokenize( "\n".join(tweet_list))) #tweetset)) print("Tagging complete!") print("Analyzing tags...") tag_table = Process.create_rules(tagged) syl_rules = Process.get_pos_syllables(tagged) rhyme_pos_table = SCD.rhyme_to_POS(tagged) print("Analysis Complete!") print("Generating poetry...") result1 = Process.generate_firsttwo(tag_table, syl_rules) r1 = result1[1] r2 = result1[2] firsttwo = result1[0] result2 = Process.generate_lasttwo(tag_table, syl_rules, rhyme_pos_table, r1, r2) lasttwo = result2 print("A poem about " + query + ":") print() print(firsttwo) print(lasttwo) except TwitterSearchException as e: # take care of all those ugly errors if there are some print(e)
def post(self): api_key = 'AIzaSyBOrP8QroOlqPw0bdOFjhXnEKB0ITXRX4o' search_name = self.request.get("map_name") search_name = search_name.replace(" ", "_") endpoint_url = "https://www.googleapis.com/youtube/v3/search?part=snippet&maxResults=6&order=relevance&q=" + search_name + "&safeSearch=strict&type=video&key=" + api_key response = urlfetch.fetch(endpoint_url) content = response.content response_as_json1 = json.loads(content) print(response_as_json1) list_url = [] for thing in response_as_json1["items"]: list_url.append(thing["id"]["videoId"]) name = self.request.get("map_name") tuo = TwitterUserOrder(name) ts = TwitterSearch( consumer_key='dKu6bH3B6kzjjQx8SQOZix1zm', consumer_secret= '0JeXDLbdGApPxoGc7X3KKDHfLSfz9nLtrfcnRNvMCQwW3MVYG1', access_token='3659983230-oAz1ASVWPfp9tA6rMKmLUy9KIbt01WEvqCwwf6z', access_token_secret='s4S30z2lb7TNy6UqULkfSnz1lJiAxvlaDTyECjFfIq27Z' ) i = 6 tsts = [] if i > 0: for tweet in ts.search_tweets_iterable(tuo): result = tweet['user']['screen_name'] + ": " + tweet['text'] tsts.append(result) print(tsts) i = i - 1 if i == 0: break print(i) r = requests.post( 'https://stevesie.com/cloud/api/v1/endpoints/3cd58c09-c547-481e-a011-180097f61f49/executions', headers={ 'Token': '04e4dc3c-481c-462f-875d-4e8202874ec7', }, json={ 'inputs': { 'session_id': '2229053416%3AERftJLIFsesnIt%3A5', 'username': name, 'max_id': '', }, 'proxy': { 'type': 'shared', 'location': 'nyc3', } }, ) response_json = r.json() img_url = response_json['object']['response']['response_text'] img_url = str(img_url) json_acceptable_string = img_url.replace('''"''', "\"") url = json.loads(json_acceptable_string) url = url['items'][0]['image_versions2']['candidates'][0]['url'] template_var = { 'tsts': tsts, 'url': url, 'list_url': list_url, } result_template = the_jinja_env.get_template('templates/results2.html') self.response.write(result_template.render(template_var))
non_sarcastic_tweets_list = [] # create search orders if args.sarcastic_path: sarcastic_tso = create_sarcastic_search_order() if args.non_sarcastic_path: non_sarcastic_tso = create_non_sarcastic_search_order() try: # query twitter API and populate tweet lists ts = TwitterSearch(consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET, access_token=ACCESS_TOKEN, access_token_secret=ACCESS_SECRET) if args.sarcastic_path: for sarcastic_tweet in ts.search_tweets_iterable(sarcastic_tso): if not sarcastic_tweet['full_text'].lower().startswith('rt'): sarcastic_tweets_list.append({ 'id': sarcastic_tweet['id'], 'urls': not not sarcastic_tweet['entities']['urls'], 'media': "media" in sarcastic_tweet["entities"], 'text': sarcastic_tweet['full_text'] }) if args.non_sarcastic_path: for non_sarcastic_tweet in ts.search_tweets_iterable( non_sarcastic_tso): if not non_sarcastic_tweet['full_text'].lower().startswith(
class TweetCrawler: """ A class for tweet retrieval This class can be used to retrieve tweets around Cork or with relevant keywords The flag -g specifies, that tweets in a 20 mile radius around Fort Meagher are searched The flag -k specifies, that tweets with the keywords in CORK_KEYWORDS are searched """ load_dotenv('.env') TWITTER_CONSUMER_KEY = os.getenv('TWITTER_CONSUMER_KEY') TWITTER_CONSUMER_SECRET = os.getenv('TWITTER_CONSUMER_SECRET') TWITTER_ACCESS_TOKEN = os.getenv('TWITTER_ACCESS_TOKEN') TWITTER_ACCESS_TOKEN_SECRET = os.getenv('TWITTER_ACCESS_TOKEN_SECRET') ANTALYA = {'keywords': ['Düden', '#Düden', '@antalyabb', 'from:antalyabb', 'to:antalyabb', 'Antalya', '#Antalya'], 'geocode' : [36.852569, 30.782124], 'name': 'Antalya'} ANTWERPEN = {'keywords': ['Antwerpen', '#Antwerpen', '@Stad_Antwerpen', 'to:Stad_Antwerpen', '@StadsLab2050', 'to:StadsLab2050', '@DgplsAntwerpen', 'to:DgplsAntwerpen', '@DeZomer', 'to:DeZomer', '@LPAntwerpen', 'to:LPAntwerpen', '@PortofAntwerp', 'to:PortofAntwerp', '@SlimnaarA', 'to:SlimnaarA', '@BZAntwerpen', 'to:BZAntwerpen', '@BusinessInA', 'to:BusinessInA'], 'geocode': [51.258820, 4.355700], 'name': 'Antwerpen'} CORK = {'keywords': ['Fort Meagher', 'Cork', 'Crosshaven', 'Camden Fort', '#Cork', '#FortMeagher','#Crosshaven', '#lovecork', '#purecork', '#visitCork', '@Corkcoco', 'from:Corkcoco', 'to:Corkcoco'], 'geocode': [51.809083, -8.279279], 'name': 'Cork'} THESSALONIKI = {'keywords': ['#Thessaloniki', 'Thessaloniki', '#Thermaikos', 'Thermaikos', '@ThessalonikCity', 'to:ThessalonikCity', '@AtThessaloniki'], 'geocode': [40.563893, 23.024136], 'name': 'Thessaloniki'} CITIES = [ANTALYA, ANTWERPEN, CORK, THESSALONIKI] def __init__(self): """ Ensures, that the necessary access settings are set and creates TwitterSearchOrder instance for further search options """ self.ts = TwitterSearch( consumer_key=self.TWITTER_CONSUMER_KEY, consumer_secret=self.TWITTER_CONSUMER_SECRET, access_token=self.TWITTER_ACCESS_TOKEN, access_token_secret=self.TWITTER_ACCESS_TOKEN_SECRET ) self.tso = TwitterSearchOrder() def get_by_location(self, city): """ Retrieves tweets in a 20 mile radius around Fort Meagher :param: Dict :return: None """ # keywords serve as a placeholder, as empty keywords are not allowed in # TwitterSearch self.tso.set_keywords([" ", ".", ","], or_operator = True) self.tso.set_geocode( #self.CORK_GEOCODE_METRICS[0], #self.CORK_GEOCODE_METRICS[1], city['geocode'][0], city['geocode'][1], 20, imperial_metric=True ) self.get_tweets(city) def get_by_keywords(self, city): """ Retrieves tweets according to the Keywords in CORK_KEYWORDS :return: None """ #self.tso.set_keywords(self.CORK_KEYWORDS, or_operator = True) self.tso.set_keywords(city['keywords'], or_operator = True) self.get_tweets(city) def get_tweets(self, city): """ Retrieves and saves tweets in a file. The options used for retrieval are saved in the tso object of the TweetCrawler instance :return: None """ output_dir = 'outputs' if not os.path.isdir(output_dir): os.mkdir(output_dir) # file for the shortened csv filename = 'Tweets_' + city['name'] + '_' + datetime.now().strftime("%Y-%m-%d %H:%M:%S") \ + '.csv' # file for the json containing the entire retrieved information filename_raw = 'Tweets_' + city['name'] + '_' + datetime.now().strftime("%Y-%m-%d %H:%M:%S") \ + '.json' try: self.tso.set_language('en') # this option ensures, that tweets are not truncated self.tso.arguments.update({'tweet_mode': 'extended'}) # self.tso.set_include_entities(False) # provides entity information with open(os.path.join(output_dir, filename), 'w') as f: tweets = [] csvWriter = csv.writer(f, quoting=csv.QUOTE_MINIMAL) csvWriter.writerow(['user', 'tweet']) for tweet in self.ts.search_tweets_iterable(self.tso): csvWriter.writerow([ tweet['user']['screen_name'].encode('utf-8'), tweet['full_text'].encode('utf-8') ]) # this is more of a workaround as using the noniterable # option of TwitterSearch 'search_tweets(tso)' caps the # amount of retrieved tweets tweets.append(tweet) # writes the total number of retrieved tweets at the end csvWriter.writerow(['tweets', len(tweets)]) print("Retrieved Tweets in: " + output_dir + "/" + filename) # writes the entire data collected into a .json file with open(os.path.join(output_dir, filename_raw), 'w') as f_raw: json.dump(tweets, f_raw) except TwitterSearchException as e: print(e) except: print('Unexpected error: ' + sys.exc_info()[0])
#PEGANDO DADOS DO TWITTER try: ts = TwitterSearch( consumer_key='mgpSEJ1Fu9le1ND0iulsoWHaM', consumer_secret= 'HMvPmooVtTmiJkfwixOORifcUZz1C442AbYz2Nodg8k0kFKjjP', access_token='1086082843476938753-6ax1DxzwPMrMfqmganAcnLJ31amKrI', access_token_secret='O7IT1RuvhO5KhhAF7Vf5uI5TekF3VaqrYjjosnH3nYwQ2' ) tso = TwitterSearchOrder() tso.set_keywords([word]) tso.set_language('pt') result = ts.search_tweets_iterable(tso) #GUARDANDO OS DADOS cont = 0 for tweet in result: writer.writerow({ 'name': str(tweet['user']['name']).translate(non_bmp_map), 'screen_name': str(tweet['user']['screen_name']).translate(non_bmp_map), 'location': str(tweet['user']['location']).translate(non_bmp_map), 'followers_count': str(tweet['user']['followers_count']).translate(non_bmp_map), 'friends_count': str(tweet['user']['friends_count']).translate(non_bmp_map),
def searchTweets(keywordLists=None, keywords=None, language=None, geo_lat=None, geo_lng=None, geo_rad=None, timeStart=None, timeStop=None, no_entities=False, no_retweets=False, no_links=False, no_answers=False): tweetsFound = [] tweetsCount = 0 tso = TwitterSearchOrder() # remove all restrictions from previos calls: tso.remove_all_filters() # this makes sure no videos/pics are commented tso.set_keywords([ "-video", "-pic", "-foto", "-funny", "-clip", "-vid", "-movie", "-song" ]) # append more synonyms and other languages TODO try: tso = TwitterSearchOrder() if keywordLists != None: for keywordList in keywordLists: tso.add_keyword(keywordList, or_operator=True) if keywords != None: for keyword in keywords: tso.add_keyword(keyword, or_operator=True) if language != None: tso.set_language(str(language)) if geo_rad != None and geo_lat != None and geo_lng != None: tso.set_geocode( geo_lat, geo_lng, geo_rad, imperial_metric=True ) # must be of format: str(lat,lng,radius) + 'km'/'mi' if timeStart != None: tso.add_keyword( 'since:' + str(timeStart)) # time has to be of the format: YYYY-MM-DD if timeStop != None: tso.add_keyword( 'until:' + str(timeStop)) # time has to be of the format: YYYY-MM-DD if no_entities: tso.set_include_entities(False) if no_retweets: pass #tso.set_include_rts(False) #TODO if no_links: pass #TODO if no_answers: pass #tso.set_exclude_replies(True) #TODO # Maybe use sentiment analysis? // tso.set_negative_attitude_filter() ts = TwitterSearch(consumer_key=consumer_key, consumer_secret=consumer_secret, access_token=access_token, access_token_secret=access_token_secret) for tweet in ts.search_tweets_iterable(tso, callback=my_callback): #tweetsFound.append(tweet) tweetsCount += 1 # write to .txt file with open(outputfile, 'a+') as outP: outP.write(str(tweet)) outP.write('\n') outP.close() # convert and write as geoJSON: with open(outputgeo, 'a+') as outPgeo: outPgeo.write(format2geoJSON(tweet)) outPgeo.close() print('@%s tweeted: %s\n' % (tweet['user']['screen_name'], tweet['text'])) except TwitterSearchException as e: print(e) except requests.exceptions.SSLError as e: print(e) return tweetsCount
def get_twitter_data(ticker): if request.method == "POST": ticker = request.form["ticker"] return redirect(url_for("get_twitter_data", ticker=ticker)) ############################################################################ # Getting stock data import requests r = requests.get('https://finnhub.io/api/v1/quote?symbol=' + ticker + '&token=c0fjign48v6snribcmh0') print(r.json()) closing_price = r.json()['c'] ############################################################################ ts = TwitterSearch(CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN_KEY, ACCESS_TOKEN_SECRET) search_obj = TwitterSearchOrder() search_obj.set_keywords([ticker, '$' + ticker, ticker + '$']) search_obj.set_language('en') search_obj.set_include_entities(False) search_obj.set_since(yesterday) search_obj.set_until(today) if ticker in saved_tickers: print(f'checking twitter for tweets of {ticker} (saved) ...') count_tweets = saved_tickers[ticker][today][0] percentage_sentiment = saved_tickers[ticker][today][1] sentiment = saved_tickers[ticker][today][2] reddit_mentions = saved_tickers[ticker][today][3] upvote_ratio = saved_tickers[ticker][today][4] else: count_tweets = 0 print('checking twitter for tweets of {} ...'.format(ticker)) for _ in ts.search_tweets_iterable(search_obj): count_tweets += 1 print('there were {} tweets of {} today'.format(count_tweets, ticker)) search_obj.set_negative_attitude_filter() count_negative = 0 for _ in ts.search_tweets_iterable(search_obj): count_negative += 1 print('according to twitter, there were {} negative tweets of {} today'.format(count_negative, ticker)) search_obj.set_positive_attitude_filter() count_positive = 0 for _ in ts.search_tweets_iterable(search_obj): count_positive += 1 sum_of_sentiment = count_negative + count_positive if sum_of_sentiment != 0: percentage_sentiment = count_positive / sum_of_sentiment * 100 if percentage_sentiment > 50: percentage_sentiment = str(int(percentage_sentiment)) + "%" sentiment = "positive" else: percentage_sentiment = str(int(percentage_sentiment)) + "%" sentiment = "negative" else: percentage_sentiment = "0%" sentiment = "positive" ############################################################################ reddit = praw.Reddit(client_id='fR7cRCGQceQ3DQ', client_secret='NFFZcftp18b64hnADKBlMsJ3n1eFEw', user_agent='Stonks', username='******', password='******') subreddits = ['stocks', 'wallstreetbets', ] subreddits = [reddit.subreddit(subreddits[i]) for i in range(len(subreddits))] reddit_mentions = search_ticker_mentions(ticker, subreddits, limit=200) upvote_ratio = search_ticker_upvotes(ticker, subreddits, limit=200) saved_tickers[ticker] = {today: [count_tweets, percentage_sentiment, sentiment, reddit_mentions, upvote_ratio]} print('according to twitter, there were {} positive tweets of {} today'.format(count_positive, ticker)) datapoints = [(today, saved_tickers[ticker][today][0], saved_tickers[ticker][today][3])] for i in range(1, 11): new_date = today - timedelta(i) datapoints.append((new_date, saved_tickers[ticker][new_date][0], saved_tickers[ticker][new_date][3])) print(datapoints) print("length of datapoints = " + str(len(datapoints))) ############################################################################ # TODO: add upvote ratio to reddit info print(saved_tickers) return render_template("ticker.html", ticker=ticker, count_tweets=count_tweets, percentage_sentiment=percentage_sentiment, sentiment=sentiment, reddit_mentions=reddit_mentions, upvote_ratio=upvote_ratio, closing_price=closing_price, datapoints=datapoints)
def go_to_sleep(current_ts_instance): time.sleep(sleep_time) for ln in lang: print(" working for language {0}".format(ln)) result = [] max_count = int(lang[ln]) current_count = 0 ts = TwitterSearch(consumer_key='key', consumer_secret='key', access_token='key', access_token_secret='key') tso = TwitterSearchOrder() tso.set_keywords(keywords, or_operator = True) tso.set_language(ln) tso.set_include_entities(True) try: for tweet in ts.search_tweets_iterable(tso, callback = go_to_sleep): if "RT @" in tweet['text'] and current_rt <= max_rt: current_count = current_count + 1 current_rt = current_rt + 1 result.append(tweet) elif "RT @" not in tweet['text']: current_count = current_count + 1 result.append(tweet) if current_count >= max_count: break except TwitterSearchException as e: print(e) file = open("data/{0}_{1}.json".format(type_name, ln), "w") file.write(json.dumps(result))
def main(): try: tso = TwitterSearchOrder() kwFile = open('keywords','r') keywords=kwFile.readlines() tso.set_keywords([s.replace('\n','') for s in keywords]) conf = read_conf('conf') key = conf['key'] secret = conf['secret'] token = conf['token'] token_secret = conf['token_secret'] ts = TwitterSearch( consumer_key = key, consumer_secret = secret, access_token = token, access_token_secret = token_secret ) search = ts.search_tweets_iterable(tso) # get previous request ids from file --DIRTY-- if !os.path.exists('previous'): open('previous', 'a').close() previous = open('previous','r') prev = dict() for i in [s.replace('\n','') for s in previous.readlines()]: prev[i]=i #print(search.keys()) prev_write = open('previous','w') mail = conf['mail'] pwd = conf['pwd'] s = smtplib.SMTP(conf['smtp_host'],int(conf['smtp_port'])) s.starttls() s.login(mail,pwd) mail_text = '' for tweet in ts.search_tweets_iterable(tso): prev_write.write(str(tweet['id'])+'\n') if not str(tweet['id']) in prev : if len(tweet['entities']['urls']) > 0 and validate_tweet(tweet['text']): #print(tweet) mail_text=mail_text+'\n\nTWEET AT '+tweet['created_at'] mail_text=mail_text+tweet['text'] prev_write.close() if len(mail_text) > 0 : msg = MIMEText(mail_text.encode('utf-8'), 'plain', 'utf-8') msg['Subject'] = 'Latest #ICanHazPdf requests...' msg['From'] = mail msg['To'] = mail s.sendmail(mail,mail,msg.as_string()) s.close() except TwitterSearchException as e: print(e)
from TwitterSearch import * try: tweets = TwitterSearch(consumer_key='XXXXXXXXX', consumer_secret='XXXXX', access_token='XXXXXXX', access_token_secret='XXXXXXX') pesquisa = TwitterSearchOrder() pesquisa.set_keywords(['chatbot']) pesquisa.set_language('pt') for tweet in tweets.search_tweets_iterable(pesquisa): print('@{0} tweeted: {1}'.format(tweet['user']['screen_name'], tweet['text'])) except TwitterSearchException as e: print(e)
class TweepyBot: #init arrays and create api def __init__(self): self.potential_influencers = [] self.twdata = {} with open(data_file, 'w') as f: f.write(json.dumps(self.twdata)) self.auth = tweepy.OAuthHandler(auth_data['tw_auth']['api_key'], auth_data['tw_auth']['api_secret']) self.auth.set_access_token(auth_data['tw_auth']['access_token'], auth_data['tw_auth']['access_secret']) self.api = tweepy.API(self.auth) self.tso = TwitterSearchOrder() self.tso.set_language("en") self.tso.set_include_entities(False) self.ts = TwitterSearch( consumer_key=auth_data['tw_auth']['api_key'], consumer_secret=auth_data['tw_auth']['api_secret'], access_token=auth_data['tw_auth']['access_token'], access_token_secret=auth_data['tw_auth']['access_secret']) #updates the engagement ratio #also dumps the text of all tweets analyzed to a json file for ML and language processing def set_engagement_ratio(self, user): tweets = get_tweets(self.api, user) self.twdata = json.load(open(data_file)) count = 0 engagement_scores = [] for t in tweets: engagement_scores.append((likes_weight * t.user.favorite_count) + (retweet_weight * t.retweet_count) / t.user.followers_count) if not t.id in self.twdata.values(): self.twdata[count] = { 'id': t.id, 'text': t.text, 'engagement_score': engagement_scores[-1] } count += 1 return sum(engagement_scores) / len(engagement_scores) def get_tweets(self, user): statuses = self.api.user_timeline(screen_name=user, count=100) ret = [] for status in statuses: ret.append(status._json['text']) return ret #searches tweets with a given str array of keywords #also dumps tweet data to mldata/twdata.json def search(self, keywords): self.tso.set_keywords(keywords) self.twdata = json.load(open(data_file)) count = 0 for tweet in self.ts.search_tweets_iterable(self.tso): if (tweet['user']['followers_count'] > follower_threshold): self.potential_influencers.append(tweet['user']['screen_name']) if not tweet['id'] in self.twdata.values(): self.twdata[count] = { 'id': tweet['id'], 'text': tweet['text'], 'engagement_score': ((likes_weight * tweet['favorite_count']) + (retweet_weight * tweet['retweet_count']) / tweet['user']['followers_count']) } count += 1 self.twdata = { k: v for k, v in sorted(self.twdata.items(), key=lambda item: item[1]) } #sorts by engagement with open(data_file, 'w') as f: f.write(json.dumps(self.twdata)) self.twdata.clear() #for ram
def analysis_keyword(request, keyword): try: form = KeywordForm() format_tweets = [] raw_tweets = [] time_slot = 20 pos_timeline = [0] * time_slot neg_timeline = [0] * time_slot time_timeline = [] tso = TwitterSearchOrder() tso.set_keywords([keyword]) tso.set_language('en') tso.set_count(100) tso.set_include_entities(False) count = 200 i = 0 start_time = datetime.max end_time = datetime.min ts = TwitterSearch( consumer_key='aUjZ7NR0b87m7lvC7NNFxmlQi', consumer_secret= 'vCNYJLewRPhMrQ6q6x1B7vJcCq1PkdOywhS7ajCY5xu9vm0u5Z', access_token='2940098420-wvLU4OftzQmtMjqN5NLBt4lL5kMUF5ubx6K1Oli', access_token_secret='4xWNY899n4JTVDKPFBEWSB2uzMI72gVF6weXqFX1xu3ID' ) # fetch for tweet in ts.search_tweets_iterable(tso): text = tweet['text'] user = tweet['user']['screen_name'] location = tweet['user']['location'] created_at = tweet['created_at'] raw_tweets.append([text, user, created_at, location]) if i >= count - 1: break else: i += 1 # tagging for tweet in raw_tweets: tag, pos_value, neg_value = tagger(tweet[0]) if tag != 0: stime = time.strptime(tweet[2], "%a %b %d %H:%M:%S +0000 %Y") dt = datetime.fromtimestamp(mktime(stime)) format_tweets.append( [tweet[0], tweet[1], dt, tag, pos_value, neg_value]) # statistics negative = 0 for tweet in format_tweets: if tweet[3] == -1: negative += 1 # generate timeline data for tweet in format_tweets: if tweet[2] < start_time: start_time = tweet[2] if tweet[2] > end_time: end_time = tweet[2] time_intvl = (end_time - start_time) / time_slot for tweet in format_tweets: slot = get_slot(time_intvl, tweet[2], start_time) - 1 if tweet[3] == 1: pos_timeline[slot] += 1 else: neg_timeline[slot] += -1 # format final timeline data for i in range(time_slot): if i % 4 == 0: timestr = (start_time + i * time_intvl).strftime('%H:%M:%S') else: timestr = '' time_timeline.append([timestr, pos_timeline[i], neg_timeline[i]]) template = loader.get_template('classifier/alys_result.html') context = RequestContext( request, { 'format_tweets': format_tweets, 'len': len(format_tweets), 'neg': negative, 'pos': len(format_tweets) - negative, 'keyword': keyword, 'timeline': time_timeline, 'form': form, }) return HttpResponse(template.render(context)) except TwitterSearchException as e: template = loader.get_template('classifier/error.html') context = RequestContext(request, { 'e_str': str(e), }) return HttpResponse(template.render(context))
querystr = tso.create_search_url() tso2 = TwitterSearchOrder() tso2.set_search_url(querystr + '&exclude=retweets') ts = TwitterSearch( consumer_key='tjYkJwTKlpK40ZblE2XIqbI8a', consumer_secret='njaZM1pzG27wa9OXjd4DheeY3WARFNxy5UtHU1EzRdK0Tind6e', access_token='907999680503713792-Yz1pnFxt1hWSMzNGnjMS7vFARUvFEWg', access_token_secret='x2m3oZEkkR2dflLGUmXFtxGwj8r2mQvd6kBUizetmuNbI') def my_callback_closure(current_ts_instance): queries, tweets_seen = current_ts_instance.get_statistics() if queries > 0 and (queries % 5) == 0: time.sleep(60) for tweet in ts.search_tweets_iterable(tso2, callback=my_callback_closure): tagsraw = tweet.get('entities', {'hashtags': []}).get('hashtags', []) tags = [] for tag in tagsraw: tags.append(tag.get('text')) append(file_path, tweet['user']['screen_name'], tweet['text'].replace('\n', '').replace('\r', ''), tweet['created_at'], tweet['retweet_count'], tweet['favorite_count'], tweet['user']['followers_count'], tags) except TwitterSearchException as e: print(e)
c1 -= 1 return replies, since api = tweepy.API(auth, wait_on_rate_limit=True) print( "Enter the Screen Name of the political party whose tweets reply tree you want to see(eg. @BJP4India,@INCIndia,@AamAadmiParty)" ) name = input() #name = '@BJP4India' reply_arr = [] non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd) ts = TwitterSearch(consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET, access_token=ACCESS_KEY, access_token_secret=ACCESS_SECRET) tuo = TwitterUserOrder(name) # create a TwitterUserOrder count = 0 for full_tweets in ts.search_tweets_iterable(tuo): count += 1 c1 = 0 reply_arr, id_tweet = Reply(full_tweets, c1) #print(tweet.text) print("Tweet :", full_tweets['text'].translate(non_bmp_map)) print("No. of replies is ..", len(reply_arr[id_tweet])) for elements in reply_arr[id_tweet]: print("Replies :", elements) reply_arr.clear()
tso.set_keywords( keywords, or_operator=True ) # let's define all words we would like to have a look for #tso.set_language('de') # we want to see German tweets only tso.set_include_entities( False) # and don't give us all those entity information # it's about time to create a TwitterSearch object with our secret tokens ts = TwitterSearch( consumer_key='tJKFqoExyXrBlvXRW0Cpk9iUp', consumer_secret='YoEnqduMUAeRlBPK13sWgniP5eu0O8xHoXlTuw0Ht60ocpNTRB', access_token='341576524-jDpQev9B54G4DpEHAOfkDNSbpUzoU2wKpbaS1Tov', access_token_secret='QQ7glr2qYKWAd5kzMeM04rdrUFmDyagCHkaMziNH9diKW') # this is where the fun actually starts :) with open( '/home/jishnu/Documents/ISB/Term3/practicum/workspace/data_collection/twitter/data/tweets_{0}' .format(datetime.datetime.today().strftime('%Y%m%d')), 'w') as fileout: #for tweet in ts.search_tweets_iterable(tso): # fileout.write('@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) ) for idx, tweet in enumerate(ts.search_tweets_iterable(tso)): fileout.write( '###TWEET_NO:%d###!###USER:%s###!###DATE_TIME:%s###!###LOCATION:%s###!###TWEET:%s' % (idx, tweet['user']['screen_name'], tweet['created_at'], tweet['user']['location'], tweet['text'])) fileout.write('\n') except TwitterSearchException as e: # take care of all those ugly errors if there are some print(e)
def tweet_collect(hashstring,in_country,disaster): tweets_list=[] #time.sleep(2) try: search_string="#"+hashstring tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_language('en') tso.add_keyword(search_string) ts = TwitterSearch( consumer_key='LheTHAR7DSfhkIqYiBdlA', consumer_secret='F8uj4jsQv7THfjs1fUf7iLDTlgQUcneJIEhEXgO6A', access_token='1282607706-yAOJ6ZQ8zLJrTPK1LxzEJ4yfgU24EwHDK64LFAu', access_token_secret='UfpSAvmUyio0ydV2mn5kBz3fP7A6c5JHmlNyGizFVvYtG', verify=False ) #print("Current rate-limiting status: %s" % ts.get_metadata()['x-rate-limit-remaining']) for tweet in ts.search_tweets_iterable(tso): #print(tweet) with open(twitter_raw_filename, 'a', encoding='utf-8') as f: json.dump(tweet, f, ensure_ascii=False) f.close() tweet_dict={} tweet_dict["tweet_user_name"] = tweet['user']['name'] tweet_dict["tweet_user_screen"] = tweet['user']['screen_name'] tweet_dict["follower_count"] = tweet['user']['followers_count'] tweet_dict["user_created"] = tweet['user']['created_at'] tweet_dict["tweet_count"] = tweet['user']['statuses_count'] tweet_dict["following_status"] = tweet['user']['following'] tweet_dict["friends_count"] = tweet['user']['friends_count'] tweet_dict["favourites_count"] = tweet['user']['favourites_count'] tweet_dict["description"] = tweet['user']['description'] tweet_dict["user_verified"] = tweet['user']['verified'] tweet_dict["user_time_zone"] = tweet['user']['time_zone'] tweet_dict["user_location"] = tweet['user']['location'] tweet_dict["user_tweet"]=tweet['text'] hash_tag_list = re.findall(r"#(\w+)", tweet['text']) hash_tag = ','.join(hash_tag_list) tweet_dict["user_created"]=tweet['created_at'] tweet_dict["hash_tag"]=hash_tag clean_the_tag = re.compile('<.*?>') cleantext = re.sub(clean_the_tag, '', tweet['source']) tweet_dict["tweet_source"] = cleantext doc = nlp(tweet['text']) location_from_tweets_list=[] for token in doc.ents: if token.label_ == "GPE" or token.label_ == "LOC" and token.label_ != "": print (token.text,token.label_) location_from_tweets_list.append(token.text) location_from_tweets=",".join(location_from_tweets_list) tweet_dict["location_from_tweets"] = location_from_tweets tweet_sentiment=TextBlob(tweet['text']) tweet_dict["tweet_sentiment_polarity"]=tweet_sentiment.polarity tweet_dict["tweet_sentiment_subjectivity"]=tweet_sentiment.subjectivity (tweet_dict["match_ratio"],tweet_dict["country_partial_match_ratio"],tweet_dict["token_sort_match_ratio"],tweet_dict["token_set_match_ratio"],)=match_score(tweet['text'],in_country+" "+disaster) (tweet_dict["location_from_tweets_match_ratio"],tweet_dict["location_from_tweets_country_partial_match_ratio"],tweet_dict["location_from_tweets_token_sort_match_ratio"],tweet_dict["location_from_tweets_token_set_match_ratio"],)=match_score(location_from_tweets,in_country) tweet_dict["input_disaster_location"]=in_country tweet_dict["input_disaster"]=disaster print(tweet_dict) tweets_list.append(tweet_dict) return tweets_list except TwitterSearchException as e: print(e)
def _do_lookup(potential_user, body): config = get_config() limit_of_tweets = int(args.limit) # Just in case the person using the program puts in ' or " in the config. consumer_key = config.get( 'twitter', 'consumer_key').replace( '\'', '').replace( '\"', '') consumer_secret = config.get( 'twitter', 'consumer_secret').replace( '\'', '').replace( '\"', '') access_token = config.get( 'twitter', 'access_token').replace( '\'', '').replace( '\"', '') access_token_secret = config.get( 'twitter', 'access_token_secret').replace( '\'', '').replace( '\"', '') if potential_user is None: print(colored("[*] It looks like OCR failed. Please make sure you " + "crop the image as in sample and is readable.", 'red')) exit(1) try: tuo = TwitterUserOrder(potential_user[1:]) ts = TwitterSearch( consumer_key=consumer_key, consumer_secret=consumer_secret, access_token=access_token, access_token_secret=access_token_secret ) tweets = [] for tweet in ts.search_tweets_iterable(tuo): # Nobody cares about re-tweets if 'RT ' not in tweet['text']: if tweet not in tweets: tweets.append((tweet['text'], tweet['id'])) if not limit_of_tweets: break else: limit_of_tweets -= 1 # If none of that was found, let's report an OCR error if body is None: print(colored("[*] It looks like OCR failed.Please make sure you " + "crop image as in sample and is readable.", 'red')) found_tweet = False # Check against every tweet pulled for tweet in tweets: removed_elements = 0 ltweet, orig_len = tweet[0].split(' '), len(tweet[0].split(' ')) # Compare each element of body to element in body. TODO: Optimize for ele in body: if ele in ltweet: removed_elements += 1 ltweet.remove(ele) removal_rate = (removed_elements / float(orig_len)) * 100 if int(removal_rate) > 75: found_tweet = True print(colored("[*] It looks like this is a valid tweet", 'green')) print(colored("-> Confidence : " + "%.2f" % removal_rate + "%", 'green')) print(colored("-> Potential URL : https://twitter.com/" + potential_user[1:] + "/status/" + str(tweet[1]), 'green')) elif int(removal_rate) in (55, 75): found_tweet = True print(colored("[*] This might be a valid tweet", 'yellow')) print(colored("-> Confidence : " + "%.2f" % removal_rate + "%", 'yellow')) print(colored("-> Potential URL : https://twitter.com/" + potential_user[1:] + "/status/" + str(tweet[1]), 'yellow')) if not found_tweet: print(colored("[*] I couldn't find a tweet like that. " + "Try increasing the limit to pull more tweets", 'yellow')) except TwitterSearchException as e: # catch all those ugly errors print(e)
import numpy as np import string from TwitterSearch import * #bliblioteca usada try: tuo = TwitterUserOrder( 'dunfrey') #conta que deseja verificar as informações ts = TwitterSearch( #informaçoes da api do twitter consumer_key='IMJh4kjQLGDzUaT9t1v0RXm5Y', consumer_secret='cjt9d684CpvElXof1BxUMgSakNnFBVLDweQTSpGZolzzrnU8JE', access_token='968521944944529408-oI5NcJVaZellwrsPjhsQkQPDeAZJzKf', access_token_secret='hc7bTI65fG97smD3ZEB6iCjLrBzHBxn2Sp6TIaX8fZSJZ') s = "" # variavel criada para armazenar a informaçao do twitter for tweet in ts.search_tweets_iterable( tuo): #laço que pega um twitter por vez texto = ('@%s tweeted: %s' % (tweet['user']['screen_name'], tweet['text'])) print(texto) #printa o twitter s += str(texto) #joga toda informação coletada em uma variavel except TwitterSearchException as e: print(e) #tratamento de exceção text = s.lower( ) #informaçao coletada é colocada em letra minuscula para que ela possa ser tratada logo abaixo text = text = text.translate({ord(k): None for k in string.punctuation}) words = text.split() wordset = set(words)
'''complexity: functions to analyse data see if anyone tweets back''' from TwitterSearch import * try: TSearch = TwitterSearchOrder() #creating an object of the attributes of the searching guidelines TSearch.set_keywords() #==== need the list of keywords (attribute of an object) TSearch.set_language('en') #set language of tweets to english TSearch.set_include_entities(True) #this is the info from the tweets ==== look up what info is involved #token used to connect to my twitter account T = TwitterSearch( consumer_key = '', consumer_secret = '', access_token = '', access_token_secret = '') for tweet in T.search_tweets_iterable(TSearch): print(tweet['user']['screen_name'] + tweet['text']) #this is where the functions will be called to scrape data, maybe to tweet back ? (another api?) ==== research necessary APIs GitHub
# "to": "uri:twitter/user/{{ in_reply_to_user_id_str }}", # "author": "uri:twitter/user/{{ user['id_str'] }}", # "text": "{{ text }}", # "parent": "uri:twitter/tweet/{{ in_reply_to_user_id_str }}", # ## "mentions" is an array of the caliper IDs from the user_mentions objects array # "mentions": ["uri:twitter/user/{{ entities[user_mentions]['id_str'] }}", "..." ], # ## "hashtags" is an array of the hashtag texts included in the tweet entities # "hashtags": ["{{ entities[hashtags][text] }}", " "] # } # } # Open the variables db_inserts = 0 caliper_tweet_object = {} twitter_id_list = [] twitter_search = ts.search_tweets_iterable(tso) tweet_sentiment = '' # this is where the fun actually starts :) for tweet in twitter_search: mentions_list = [] hashtags_list = [] tweet_id = "" # Create the caliper_tweet object caliper_tweet = { "context": "http://purl.imsglobal.org/ctx/caliper/v1/MessagingEvent", "type": "MessagingEvent", "startedAtTime": "", ## Can be used to query Twitter API for user information "actor": "", "verb": "tweetSent",