def configureSearch(id_tweet): print("ConfigureSearch: " + str(id_tweet)) now = datetime.datetime.now() datefortweet = datetime.date(now.year, now.month, now.day) twSOrder = TwitterSearchOrder() # create a TwitterSearchOrder object #twSOrder.set_keywords(['from:YodaBotter', 'to:YodaBotter'], or_operator = True) twSOrder.add_keyword("#AWSNinja") #twSOrder.set_language('en') # we want to see English tweets only twSOrder.set_include_entities(True) # and get all the entities incl. Media print("Search: " + twSOrder.create_search_url()) twSOrder.set_since(datefortweet) return twSOrder
def getTweets(politician_id, searchOnlySexistWords): try: politician = Politician.objects.get(id=politician_id) politician_names = [ politician.first_name + " " + politician.last_name, politician.username ] tso = TwitterSearchOrder() searchTerms = [] if searchOnlySexistWords: sexistWords = CONFIG['SEXISTWORDS'] for word in sexistWords: for politician_name in politician_names: searchTerms.append(word + ' ' + politician_name) elif searchOnlySexistWords is False: searchTerms = politician_names tso.set_keywords(searchTerms, or_operator=True) tso.set_language("en") tso.set_include_entities(False) querystr = tso.create_search_url() tso.set_search_url(querystr + "&tweet_mode=extended") ts = TwitterSearch(consumer_key=CONFIG['CONSUMER_KEY'], consumer_secret=CONFIG['CONSUMER_SECRET'], access_token=CONFIG['ACCESS_TOKEN'], access_token_secret=CONFIG['ACCESS_TOKEN_SECRET']) print("**Processing tweets for " + str(politician.first_name + " " + politician.last_name) + "**") if searchOnlySexistWords: tweets = ts.search_tweets_iterable(tso) return tweets else: # will limit to 100 if not only searching sexist words tweets = ts.search_tweets(tso) return tweets['content']['statuses'] except TwitterSearchException as e: logging.exception("Unable to get new tweets because of" + str(e))
def gettwitter(query): try: tso = TwitterSearchOrder() tso.set_language('en') tso.set_locale('en') tso.set_keywords([query]) url = "https://twitter.com/search"+tso.create_search_url() print url except TwitterSearchException as e: print(e) html = getHtml(url) soup = BeautifulSoup(html) twits = soup.find_all("p",class_="TweetTextSize") twitters=[] for t in twits: dr = re.compile(r'<[^>]+>',re.S) replacedStr = dr.sub('',str(t)) replacedStr = re.sub(r"([a-zA-z]+://\S*\s{0,1})", "url", replacedStr) twitters.append(replacedStr+"\n") return twitters
def getTweets(politician_id): try: politician = Politician.objects.get(id=politician_id) politician_names = [politician.first_name + " " + politician.last_name, politician.last_name, politician.username] print("Getting Tweets for " + str(politician.first_name + " " + politician.last_name)) tso = TwitterSearchOrder() sexistWords = ['bitch', 'skank', 'rape'] searchTerms = [] for word in sexistWords: for politician in politician_names: searchTerms.append(word + ' ' + politician) tso.set_keywords(searchTerms, or_operator=True) print(searchTerms) tso.set_language("en") tso.set_include_entities(False) querystr = tso.create_search_url() tso.set_search_url(querystr + "&tweet_mode=extended") ts = TwitterSearch( consumer_key = os.environ.get('CONSUMER_KEY', CONFIG['CONSUMER_KEY']), consumer_secret = os.environ.get('CONSUMER_SECRET', CONFIG['CONSUMER_SECRET']), access_token = os.environ.get('ACCESS_TOKEN', CONFIG['ACCESS_TOKEN']), access_token_secret = os.environ.get('ACCESS_TOKEN_SECRET', CONFIG['ACCESS_TOKEN_SECRET']) ) return ts.search_tweets_iterable(tso) except TwitterSearchException as e: logging.exception("Unable to get new tweets because of" + str(e)) # if __name__ == "__main__": # getTweets()
# # f = open('/root/sample_data/p.txt','w') # # for i in range(len(result)): # # t = result[i][1].encode('ascii', 'replace').replace("\n"," ") # # f.write(t+"\n") # f.close() from TwitterSearch import TwitterSearchOrder, TwitterSearchException import urllib2 try: tso = TwitterSearchOrder() tso.set_language('en') tso.set_locale('en') tso.set_keywords(['airline mergers']) url = "https://twitter.com/search"+tso.create_search_url() print url response = urllib2.urlopen('http://www.baidu.com/') html = response.read() print html except TwitterSearchException as e: print(e) tso2 = TwitterSearchOrder() tso2.set_search_url(querystr + '&result_type=mixed&include_entities=true') tso2.set_locale('en') print(tso2.create_search_url()) tso = TwitterSearchOrder()