def search(kw): begin = int(request.args['begin']) end = int(request.args['end']) if begin == 0: try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_language('en') tso.set_keywords([kw]) # let's define all words we would like to have a look for tso.set_include_entities(False) # and don't give us all those entity information # it's about time to create a TwitterSearch object with our secret tokens ts = TwitterSearch( consumer_key = Tconsumer_key, consumer_secret = Tconsumer_secret, access_token = Taccess_token, access_token_secret = Taccess_token_secret ) # this is where the fun actually starts :) ts.search_tweets(tso) oldTweets = Tweets.get_one(keyword=kw) if oldTweets is None: tweets = Tweets(keyword=kw, tw=ts.get_tweets()['statuses']) tweets.save() else: oldTweets.tw = ts.get_tweets()['statuses'] oldTweets.save() except TwitterSearchException as e: # take care of all those ugly errors if there are some print(e) return jsonify(error=1) tweets = Tweets.get_one(keyword=kw).tw return jsonify(tweets=tweets[begin:end])
def get_data(self, config): count = int(config.get('count', 50)) # Configure the search query tso = TwitterSearchOrder() tso.set_keywords([config['query']]) tso.set_include_entities(False) # Configure the search ts = TwitterSearch(consumer_key=config['consumer_key'], consumer_secret=config['consumer_secret'], access_token=config['access_token'], access_token_secret=config['access_secret']) data = dict() data['query'] = config['query'] data['tweets'] = list() # Do the search result = ts.search_tweets(tso)['content']['statuses'] # Filter results into more usable format for tweet in result[:count]: filtered_tweet = dict() filtered_tweet['username'] = '******' + tweet['user']['screen_name'] filtered_tweet['tweet'] = tweet['text'] data['tweets'].append(filtered_tweet) return data
def search(search): twitter_utils = twitter.Twitter() credentials = session.get('credentials') search = [search] try: tso = TwitterSearchOrder() tso.set_language('en') tso.set_keywords(search) tso.set_include_entities(False) #Remove later if u want to use images query = TwitterSearch(consumer_key=session['consumer_key'], consumer_secret=session['consumer_secret'], access_token=session['token'], access_token_secret=session['token_secret']) response = query.search_tweets(tso) t_range = datetime.now(pytz.utc) - timedelta(minutes=TIME_RANGE) tweets = [ t for t in response['content']['statuses'] if twitter_utils.get_date(t) >= t_range ] print("Current rate-limiting status: " + str(query.get_metadata()['x-rate-limit-reset'])) return render_template( "page.html", search=search, tweets=tweets, music_config=twitter_utils.get_music_config(tweets)) except TwitterSearchException as e: return str(e)
def userSearch(user): ts = TwitterSearch( consumer_key='UIBl6otwQD9CtbhRQSQ2GlV8H', consumer_secret='MlxVNNZDWfEDBpOTbZwOAPQ8BziP3tcQwMoU3vXdxllzsdgjLu', access_token='85289745-4PknFj4zSUPd12rbIg8ZkPnAAewZCEmwXj3wyNbiO', access_token_secret='A0RNhwgoVh0okZQoL5w6UydpplyTSft1Sx6QCZ4TtvaAC') tuo = TwitterUserOrder(user) # create a TwitterUserOrder # start asking Twitter about the timeline return ts.search_tweets(tuo)
def userSearch(user): ts = TwitterSearch( consumer_key = 'UIBl6otwQD9CtbhRQSQ2GlV8H', consumer_secret = 'MlxVNNZDWfEDBpOTbZwOAPQ8BziP3tcQwMoU3vXdxllzsdgjLu', access_token = '85289745-4PknFj4zSUPd12rbIg8ZkPnAAewZCEmwXj3wyNbiO', access_token_secret = 'A0RNhwgoVh0okZQoL5w6UydpplyTSft1Sx6QCZ4TtvaAC' ) tuo = TwitterUserOrder(user) # create a TwitterUserOrder # start asking Twitter about the timeline return ts.search_tweets(tuo)
def search_and_archive(): todo = True results = [] next_max_id = 0 since_id = get_since_id() tso = TwitterSearchOrder() tso.add_keyword(TW_SEARCH_KEYWORD) tso.set_result_type('recent') if since_id > 0: tso.set_since_id(since_id) ts = TwitterSearch(consumer_key=TW_CONSUMER_KEY, consumer_secret=TW_CONSUMER_SECRET, access_token=TW_ACCESS_TOKEN, access_token_secret=TW_ACCESS_SECRET) while (todo): print('Current MaxID is ' + str(next_max_id)) response = ts.search_tweets(tso) todo = not len(response['content']['statuses']) == 0 for tweet in response['content']['statuses']: result = {} for key in mp: result[key] = str(nest_get(mp[key], tweet)) result.update({ 'time': format_timestamp(result['created_at']), 'created_at': format_timestamp(result['created_at']), 'rt_created_at': format_timestamp(result['rt_created_at']), 'qs_created_at': format_timestamp(result['qs_created_at']), 'primary_url': pick_primary_url(result['entities_urls']), 'rt_primary_url': pick_primary_url(result['rt_entities_urls']), 'qs_primary_url': pick_primary_url(result['qs_entities_urls']), }) results.append(result) if (tweet['id'] < next_max_id) or (next_max_id == 0): next_max_id = tweet['id'] next_max_id -= 1 records = len(results) if records > 450 or todo is not True: bulk_load(results) results = [] print('processed ' + str(records) + ' records.') tso.set_max_id(next_max_id) sleep(2)
def searchTweet(mySearch, lang, isSaved, filePath, isSentToKafka, kafkaProd, topic): try: now = str(datetime.datetime.now()).replace(" ", "T") if isSaved: fd = open(filePath, 'w') tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_keywords( mySearch ) # let's define all words we would like to have a look for tso.set_language(lang) # we want to see German tweets only tso.set_include_entities( False) # and don't give us all those entity information # it's about time to create a TwitterSearch object with our secret tokens ts = TwitterSearch(consumer_key=consumerKey, consumer_secret=consumerSecret, access_token=accessTokenKey, access_token_secret=accessTokenSecret) # this is where the fun actually starts :) for tweet in ts.search_tweets_iterable(tso): data = { "id": tweet["id"], "user": tweet["user"]["screen_name"], "sentence": tweet["text"], "country": getCountry(tweet["place"]), "lang": tweet["lang"], "retweetCount": tweet["retweet_count"], "isRetweet": tweet["retweeted"], "keySearch": mySearch[0], "eventDate": now } if isSaved: # fd.write(json.dumps(data).encode("utf-8")+"\n") # fd.flush() pass if isSentToKafka: kafkaProd.send(topic, json.dumps(data).encode("utf-8")) return ts.search_tweets(tso) except TwitterSearchException as e: # take care of all those ugly errors if there are some print(e)
def hashtagSearch(hashtag): ts = TwitterSearch( consumer_key = 'UIBl6otwQD9CtbhRQSQ2GlV8H', consumer_secret = 'MlxVNNZDWfEDBpOTbZwOAPQ8BziP3tcQwMoU3vXdxllzsdgjLu', access_token = '85289745-4PknFj4zSUPd12rbIg8ZkPnAAewZCEmwXj3wyNbiO', access_token_secret = 'A0RNhwgoVh0okZQoL5w6UydpplyTSft1Sx6QCZ4TtvaAC' ) try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_keywords([hashtag]) # let's define all words we would like to have a look for tso.set_include_entities(False) # and don't give us all those entity information # it's about time to create a TwitterSearch object with our secret tokens # this is where the fun actually starts :) return ts.search_tweets(tso) except TwitterSearchException as e: # take care of all those ugly errors if there are some print(e)
def hashtagSearch(hashtag): ts = TwitterSearch( consumer_key='UIBl6otwQD9CtbhRQSQ2GlV8H', consumer_secret='MlxVNNZDWfEDBpOTbZwOAPQ8BziP3tcQwMoU3vXdxllzsdgjLu', access_token='85289745-4PknFj4zSUPd12rbIg8ZkPnAAewZCEmwXj3wyNbiO', access_token_secret='A0RNhwgoVh0okZQoL5w6UydpplyTSft1Sx6QCZ4TtvaAC') try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_keywords([ hashtag ]) # let's define all words we would like to have a look for tso.set_include_entities( False) # and don't give us all those entity information # it's about time to create a TwitterSearch object with our secret tokens # this is where the fun actually starts :) return ts.search_tweets(tso) except TwitterSearchException as e: # take care of all those ugly errors if there are some print(e)
# this is where the fun actually starts :) # for tweet in ts.search_tweets_iterable(tso): # print( '%s' % ( tweet['text'] ) ) # except TwitterSearchException as e: # take care of all those ugly errors if there are some # print(e) todo = True next_max_id = 0 i=0 f = open('tweet.txt','a') # let's start the action while(todo): # first query the Twitter API response = ts.search_tweets(tso) # print rate limiting status #print( "Current rate-limiting status: %s" % ts.get_metadata()['x-rate-limit-reset']) # check if there are statuses returned and whether we still have work to do todo = not len(response['content']['statuses']) == 0 # check all tweets according to their ID for tweet in response['content']['statuses']: tweet_id = tweet['id'] a=str(tweet['full_text'].encode('utf-8')) i=i+1 print(i) # current ID is lower than current next_max_id?
ts = TwitterSearch( consumer_key = 'F8QnORsYQqXiY2zu7FDmxfRsL', consumer_secret = '5v7l0GxfUb0ukigxTsvwiGungqWfZjBGv5fSygnXOUGH5JFbph', access_token = '17522673-AmQBBeDcHwkwJVtkwxhOW8iESiW5xoVlZeLNMOEIp', access_token_secret = 'hRyBq99BDmjXFaCnKTZ3YGVEUaFQz9bP0T7whvV0FFOv7' ) onehourearlier = datetime.datetime.now() - datetime.timedelta(hours=4) # onehourearlier = datetime.datetime.now() currTime = onehourearlier.strftime('%l:%M%p').replace(' ','') search_word = '#'+currTime tso.set_keywords([search_word]) try: response = ts.search_tweets(tso) tweet = response['content']['statuses'][0] msg = '@%s: %s' % ( tweet['user']['screen_name'].encode('utf-8'), tweet['text'].encode('utf-8') ) except TwitterSearchException as e: print(e) if currTime.lower() in msg.lower(): print msg # Push to Parse connection = httplib.HTTPSConnection('api.parse.com', 443) connection.connect() params = urllib.urlencode({"where":json.dumps({ "timestamp": currTime })}) connection.request('GET', '/1/classes/tweets?%s' % params, '', {
if (domainName.find('twitter.com') == -1): success = True return success try: tso = TwitterSearchOrder() tso.set_keywords(['cyclist', 'killed']) tso.set_language('en') tso.set_include_entities(True) ts = TwitterSearch( consumer_key='FSRqsdZnVc0vQaosOjNE7Gb4z', consumer_secret='8Vu8HqCXnVSuLvOZt3RJmlK8HVP9z5zsh7Qk9k4PcZjeNsAHnA', access_token='1018148114-G4apdnYp0htOcwYb2CBhnZkI8d619T40g8jUNEM', access_token_secret='ZVwZGXmox8mxWYpQrC6A8BhfiFIbT7Q3jlLiXwUE1v1q4') mysearchResp = ts.search_tweets(tso) contentOnly = mysearchResp['content']['statuses'] filterResp = [ x for x in contentOnly if len(x['entities']['urls']) > 0 and contains_nonTwitter_domain(x['entities']['urls']) ] f = open('output.txt', 'w', encoding='utf8') f.write(str(filterResp)) f.close() except TwitterSearchException as e: print(e)
def queryTwitter(records,outputFileIndex,totalRunTime,writeToFileTime, sleepTime): n = GeoLocator() req = 0 next_max_id = 0 startTime = time.time() lastWriteTime = startTime tso = None ts = None # while time.time() - startTime < totalRunTime: while True: try: now = time.time() print 'Total running time: ' + str(now-startTime) + ' seconds' # Check if it is time to write to file if now-lastWriteTime>writeToFileTime: print 'Writing to CSV ' + str(len(records)) + ' Tweets' records, outputFileIndex = saveRecordsToCSV (records,outputFileIndex) lastWriteTime = now # If first run, or recover after exception, create new twitter search object if tso == None: tso = TwitterSearchOrder() tso.set_keywords(['']) #tso.setLanguage('en') tso.set_count(100) tso.set_include_entities(False) tso.set_geocode(40.69, -73.94, 20, imperial_metric = False) #tso.setUntil(datetime.date(2014, 03, 24)) ts = TwitterSearch(consumer_key='FqjFRT1OHl6xyIGoq9uXSA', consumer_secret='KuhoVREmf7ngwjOse2JOLJOVXNCi2IVEzQZu2B8', access_token='114454541-xcjy2sbl7Rr4oIaogsaBrlVL5H4CvcdvOSMy3MnR', access_token_secret='yyBBOJhxgfw9pezZda2hWF94doONSd50y0JoylYjL3rmY', verify=False) # Query the Twitter API text_file = open('json_tweets' + str(outputFileIndex) + '.txt', 'a') text_fileE = open('error_log.txt', 'a') req += 1 print 'Request # ' + str(req) response = ts.search_tweets(tso) # check all tweets according to their ID for tweet in response['content']['statuses']: text_file.write(str(tweet)) text_file.write('\n') tup = () tweet_id = tweet['id'] tup = tup + (tweet_id, ) tup = tup + (str(tweet['created_at']), ) tup = tup + (str(tweet['user']['created_at']), ) tup = tup + (str(tweet['user']['screen_name']), ) tup = tup + (str(tweet['user']['location'].encode('ascii', 'ignore')), ) tup = tup + (str(tweet['user']['geo_enabled']), ) tup = tup + (str(tweet['place']), ) tup = tup + (str(tweet['geo']), ) tup = tup + (str(tweet['text'].encode('ascii', 'ignore')), ) tup = tup + (str(tweet['retweet_count']), ) # Save only tweets with Geo within NYC or without geo at all try: geoObj = yaml.load(tup[7]) lat = geoObj["u'coordinates'"][0] long = geoObj["u'coordinates'"][1] if n.isNYC(lat,long): records.append(tup) except: records.append(tup) # current ID is lower than current next_max_id? if tweet_id < next_max_id or next_max_id == 0: next_max_id = tweet_id next_max_id -= 1 # decrement to avoid seeing this tweet again # set lowest ID as MaxID tso.set_max_id(next_max_id) print 'Number of Tweets in memory: ' + str(len(records)) print 'Sleeping...' time.sleep(sleepTime) except TwitterSearchException, e: print e if len(records) == 0: next_max_id = 0 if text_file.closed: pass else: text_file.close() outputFileIndex = getFile_index() text_fileE.write(str(e)) text_fileE.write('\n') text_fileE.close() print 'sleeping after error...' time.sleep(900) # Set tso to None to create new Twitter search object tso = None
def run_tweet_scrub(keywords): print 'Running scrub for keywords: ' + str(keywords) try: tso = TwitterSearchOrder() tso.set_keywords(keywords) tso.set_language('en') tso.set_include_entities(True) config_parse = ConfigParser.ConfigParser() config_parse.read('config.ini') ts = TwitterSearch( consumer_key=config_parse.get('keys', 'consumer_key'), consumer_secret=config_parse.get('keys', 'consumer_secret'), access_token=config_parse.get('keys', 'access_token'), access_token_secret=config_parse.get('keys', 'access_token_secret'), ) scrub = Scrub(RunDateTime=datetime.datetime.now(), ScrubType='Twitter', ScrubTypeId=1, SearchKeywords=', '.join(keywords)) scrub.save() mysearchResp = ts.search_tweets(tso) contentOnly = mysearchResp['content']['statuses'] filter_resp = [ x for x in contentOnly if len(x['entities']['urls']) > 0 and contains_nonTwitter_domain(x['entities']['urls']) ] num_related = 0 scrub.NumCandidates = len(filter_resp) for candidate in filter_resp: twit_url = get_expanded_url(candidate['entities']['urls']) if IncidentSourceCandidate.select().where( IncidentSourceCandidate.URL == twit_url).count() > 0: print('Continued on URL: ' + twit_url) continue g = Goose() article = g.extract(url=twit_url) try: twit_id = candidate['id'] source_candidate = IncidentSourceCandidate( URL=twit_url, Domain=article.domain, ArticleText=article.cleaned_text, ArticleTitle=article.title, Scrub=scrub, SearchFeedId=twit_id, SearchFeedURL=get_tweet_url(candidate), SearchFeedText=candidate['text'].encode('utf8')) source_candidate.ArticleTitle.replace("'", "'") source_candidate.ArticleText.replace("'", "'") source_candidate.SearchFeedJSON = candidate if (article.opengraph is not None) and ('site_name' in article.opengraph): source_candidate.Name = article.opengraph['site_name'] if source_is_related(source_candidate): source_candidate.IsRelated = True num_related += 1 source_candidate.ArticleTitle.encode('ascii', 'ignore') source_candidate.ArticleText.encode('ascii', 'ignore') source_candidate.save() print(source_candidate.SearchFeedText) except Exception as e: print(twit_url) print(type(article.cleaned_text)) print(e) scrub.NumRelatedCandidates = num_related scrub.save() except TwitterSearchException as e: print(e) print '\n' print '--------------------------------------'