def run_newsflash(existing_tweet_corpus, lang, bounding_box, ngrams, update_interval): ''' Get tweets from a file and then stream the API chose bounding box by picking points on google maps and then rounding them out For Manhattan: SW corner: 40.63,-74.12 NE corner: 40.94,-73.68 Thus, input bounding_box should be "-74.12,40.63,-73.68,40.94" ''' global nf_obj global is_trained url = 'https://stream.twitter.com/1.1/statuses/filter.json' params = '?language=%s&locations=%s' % (lang, bounding_box) nf_obj = nf.train_nf(existing_tweet_corpus, clients, ngrams) nf.compute_rankings(nf_obj) is_trained = True for client in clients: client.write_message(json.dumps({'type': 'status', 'status': True})) stream_stats(nf_obj) # push preliminary (pre-stream) rankings source = twitterreq((url + params), 'GET', []) retrieve_tweets_with_newsflash(nf_obj, source, update_interval)
def run_newsflash(existing_tweet_corpus, lang, bounding_box, ngrams, update_interval): ''' Get tweets from a file and then stream the API chose bounding box by picking points on google maps and then rounding them out For Manhattan: SW corner: 40.63,-74.12 NE corner: 40.94,-73.68 Thus, input bounding_box should be "-74.12,40.63,-73.68,40.94" ''' global nf_obj global is_trained url = 'https://stream.twitter.com/1.1/statuses/filter.json' params = '?language=%s&locations=%s' % (lang, bounding_box) nf_obj = nf.train_nf(existing_tweet_corpus, clients, ngrams) nf.compute_rankings(nf_obj) is_trained = True for client in clients: client.write_message(json.dumps({'type' : 'status', 'status': True })) stream_stats(nf_obj) # push preliminary (pre-stream) rankings source = twitterreq((url+params), 'GET', []) retrieve_tweets_with_newsflash(nf_obj, source, update_interval)
def retrieve_tweets_with_newsflash(nf_obj, source, update_interval): thread = threading.current_thread() count = 0 print 'Streaming live Twitter data' for tweet in source: if thread.stop: threads.remove(thread) break # NOTE THAT THIS IGNORES THE ORIGINAL TWEET # IN THE CASE OF RETWEETS. this is okay, for now. t = parse_streaming_tweet(tweet) if t is not None: t = t[0] count += 1 sys.stdout.write(' Parsing tweet %d \r' % (count)) sys.stdout.flush() tweet_json = json.dumps({'type' : 'tweet', 'tweet' : {'latitude' : t[5], 'longitude' : t[6], 'tid': t[0], 'text' : t[7]}}) # now add it to the Newsflash object nf.parse_tweet(nf_obj, t) for client in clients: client.write_message(tweet_json) if count == update_interval: sys.stdout.write('Recomputing rankings\n') count = 0 nf.compute_rankings(nf_obj) stream_stats(nf_obj) sys.stdout.write('\n')
def retrieve_tweets_with_newsflash(nf_obj, source, update_interval): thread = threading.current_thread() count = 0 print 'Streaming live Twitter data' for tweet in source: if thread.stop: threads.remove(thread) break # NOTE THAT THIS IGNORES THE ORIGINAL TWEET # IN THE CASE OF RETWEETS. this is okay, for now. t = parse_streaming_tweet(tweet) if t is not None: t = t[0] count += 1 sys.stdout.write(' Parsing tweet %d \r' % (count)) sys.stdout.flush() tweet_json = json.dumps({ 'type': 'tweet', 'tweet': { 'latitude': t[5], 'longitude': t[6], 'tid': t[0], 'text': t[7] } }) # now add it to the Newsflash object nf.parse_tweet(nf_obj, t) for client in clients: client.write_message(tweet_json) if count == update_interval: sys.stdout.write('Recomputing rankings\n') count = 0 nf.compute_rankings(nf_obj) stream_stats(nf_obj) sys.stdout.write('\n')
def run_newsflash(nf_pickle_file): ''' modified version of fetch_from_manhattan; instead of writing to a csv file it puts the tweets into the newsflash object and calculates rankings ''' nf = pickle.load(file(nf_pickle_file)) print 'Newsflash pickle object successfully loaded' url = 'https://stream.twitter.com/1.1/statuses/filter.json' add = '?language=en&locations=-74.12,40.63,-73.68,40.94' response = twitterreq((url + add), 'GET', []) print 'API call made' update = 0 for line in response: tweets_info = parse_streaming_tweet(line) if tweets_info is not None: update += 1 sys.stdout.write(' Parsing tweet %d \r' % (update)) sys.stdout.flush() nf.last_tweet = parse_tweet(nf, tweets_info[0]) if tweets_info[1] is not None: # if it's a retweet, add the original tweet, but DON'T # upddate "last tweet" bc it's obv gonna be older parse_tweet(nf, tokenizer, tweets_info[1]) # update every 50 tweets if update == 50: sys.stdout.write('Recomputing rankings\n') update = 0 rankings = compute_rankings(nf, True) for term in rankings[:20]: rank = nf.ranks[term] print '%s (%d, %f)\t%f' % (term, rank.freq, rank.dfreq, rank.box_size) sys.stdout.write('\n')
def run_newsflash(nf_pickle_file): ''' modified version of fetch_from_manhattan; instead of writing to a csv file it puts the tweets into the newsflash object and calculates rankings ''' nf = pickle.load(file(nf_pickle_file)) print 'Newsflash pickle object successfully loaded' url = 'https://stream.twitter.com/1.1/statuses/filter.json' add = '?language=en&locations=-74.12,40.63,-73.68,40.94' response = twitterreq((url+add), 'GET', []) print 'API call made' update = 0 for line in response: tweets_info = parse_streaming_tweet(line) if tweets_info is not None: update += 1 sys.stdout.write(' Parsing tweet %d \r' % (update)) sys.stdout.flush() nf.last_tweet = parse_tweet(nf, tweets_info[0]) if tweets_info[1] is not None: # if it's a retweet, add the original tweet, but DON'T # upddate "last tweet" bc it's obv gonna be older parse_tweet(nf, tokenizer, tweets_info[1]) # update every 50 tweets if update == 50: sys.stdout.write('Recomputing rankings\n') update = 0 rankings = compute_rankings(nf, True) for term in rankings[:20]: rank = nf.ranks[term] print '%s (%d, %f)\t%f' % (term, rank.freq, rank.dfreq, rank.box_size) sys.stdout.write('\n')