コード例 #1
0
ファイル: app.py プロジェクト: ahadik/newsflash
def run_newsflash(existing_tweet_corpus, lang, bounding_box, ngrams,
                  update_interval):
    '''
	Get tweets from a file and then stream the API

	chose bounding box by picking points on google maps and then 
	rounding them out

	For Manhattan:
	SW corner: 40.63,-74.12
	NE corner: 40.94,-73.68
	Thus, input bounding_box should be "-74.12,40.63,-73.68,40.94"
	'''
    global nf_obj
    global is_trained

    url = 'https://stream.twitter.com/1.1/statuses/filter.json'
    params = '?language=%s&locations=%s' % (lang, bounding_box)

    nf_obj = nf.train_nf(existing_tweet_corpus, clients, ngrams)
    nf.compute_rankings(nf_obj)

    is_trained = True
    for client in clients:
        client.write_message(json.dumps({'type': 'status', 'status': True}))

    stream_stats(nf_obj)  # push preliminary (pre-stream) rankings

    source = twitterreq((url + params), 'GET', [])
    retrieve_tweets_with_newsflash(nf_obj, source, update_interval)
コード例 #2
0
ファイル: app.py プロジェクト: ahadik/newsflash
def run_newsflash(existing_tweet_corpus, lang, bounding_box, ngrams, update_interval):
	'''
	Get tweets from a file and then stream the API

	chose bounding box by picking points on google maps and then 
	rounding them out

	For Manhattan:
	SW corner: 40.63,-74.12
	NE corner: 40.94,-73.68
	Thus, input bounding_box should be "-74.12,40.63,-73.68,40.94"
	'''
	global nf_obj
	global is_trained

	url = 'https://stream.twitter.com/1.1/statuses/filter.json'
	params = '?language=%s&locations=%s' % (lang, bounding_box)

	nf_obj = nf.train_nf(existing_tweet_corpus, clients, ngrams)
	nf.compute_rankings(nf_obj)
	
	is_trained = True
	for client in clients:
		client.write_message(json.dumps({'type' : 'status', 'status': True }))
	
	stream_stats(nf_obj) # push preliminary (pre-stream) rankings
	
	source = twitterreq((url+params), 'GET', [])
	retrieve_tweets_with_newsflash(nf_obj, source, update_interval)
コード例 #3
0
ファイル: app.py プロジェクト: ahadik/newsflash
def retrieve_tweets_with_newsflash(nf_obj, source, update_interval):
	thread = threading.current_thread()
	count = 0
	print 'Streaming live Twitter data'
	for tweet in source:
		if thread.stop:
			threads.remove(thread)
			break
		
		# NOTE THAT THIS IGNORES THE ORIGINAL TWEET
		# IN THE CASE OF RETWEETS. this is okay, for now.
		t = parse_streaming_tweet(tweet)
		if t is not None:
			t = t[0]
			count += 1
			sys.stdout.write(' Parsing tweet %d    \r' % (count))
			sys.stdout.flush()

			tweet_json = json.dumps({'type' : 'tweet', 
				'tweet' : {'latitude' : t[5], 
				'longitude' : t[6], 'tid': t[0],
				'text' : t[7]}})

			# now add it to the Newsflash object
			nf.parse_tweet(nf_obj, t)

			for client in clients:
				client.write_message(tweet_json)

			if count == update_interval:
				sys.stdout.write('Recomputing rankings\n')
				count = 0
				nf.compute_rankings(nf_obj)
				stream_stats(nf_obj)
				sys.stdout.write('\n')
コード例 #4
0
ファイル: app.py プロジェクト: ahadik/newsflash
def retrieve_tweets_with_newsflash(nf_obj, source, update_interval):
    thread = threading.current_thread()
    count = 0
    print 'Streaming live Twitter data'
    for tweet in source:
        if thread.stop:
            threads.remove(thread)
            break

        # NOTE THAT THIS IGNORES THE ORIGINAL TWEET
        # IN THE CASE OF RETWEETS. this is okay, for now.
        t = parse_streaming_tweet(tweet)
        if t is not None:
            t = t[0]
            count += 1
            sys.stdout.write(' Parsing tweet %d    \r' % (count))
            sys.stdout.flush()

            tweet_json = json.dumps({
                'type': 'tweet',
                'tweet': {
                    'latitude': t[5],
                    'longitude': t[6],
                    'tid': t[0],
                    'text': t[7]
                }
            })

            # now add it to the Newsflash object
            nf.parse_tweet(nf_obj, t)

            for client in clients:
                client.write_message(tweet_json)

            if count == update_interval:
                sys.stdout.write('Recomputing rankings\n')
                count = 0
                nf.compute_rankings(nf_obj)
                stream_stats(nf_obj)
                sys.stdout.write('\n')
コード例 #5
0
ファイル: fetch_tweets.py プロジェクト: ahadik/newsflash
def run_newsflash(nf_pickle_file):
    '''
    modified version of fetch_from_manhattan; instead of writing to a csv file
    it puts the tweets into the newsflash object and calculates rankings
    '''
    nf = pickle.load(file(nf_pickle_file))

    print 'Newsflash pickle object successfully loaded'

    url = 'https://stream.twitter.com/1.1/statuses/filter.json'
    add = '?language=en&locations=-74.12,40.63,-73.68,40.94'
    response = twitterreq((url + add), 'GET', [])

    print 'API call made'

    update = 0

    for line in response:
        tweets_info = parse_streaming_tweet(line)
        if tweets_info is not None:
            update += 1
            sys.stdout.write(' Parsing tweet %d    \r' % (update))
            sys.stdout.flush()

            nf.last_tweet = parse_tweet(nf, tweets_info[0])
            if tweets_info[1] is not None:
                # if it's a retweet, add the original tweet, but DON'T
                # upddate "last tweet" bc it's obv gonna be older
                parse_tweet(nf, tokenizer, tweets_info[1])

        # update every 50 tweets
        if update == 50:
            sys.stdout.write('Recomputing rankings\n')
            update = 0
            rankings = compute_rankings(nf, True)
            for term in rankings[:20]:
                rank = nf.ranks[term]
                print '%s (%d, %f)\t%f' % (term, rank.freq, rank.dfreq,
                                           rank.box_size)
            sys.stdout.write('\n')
コード例 #6
0
ファイル: fetch_tweets.py プロジェクト: ahadik/newsflash
def run_newsflash(nf_pickle_file):
    '''
    modified version of fetch_from_manhattan; instead of writing to a csv file
    it puts the tweets into the newsflash object and calculates rankings
    '''
    nf = pickle.load(file(nf_pickle_file))

    print 'Newsflash pickle object successfully loaded'

    url = 'https://stream.twitter.com/1.1/statuses/filter.json'
    add = '?language=en&locations=-74.12,40.63,-73.68,40.94'
    response = twitterreq((url+add), 'GET', [])

    print 'API call made'

    update = 0

    for line in response:
        tweets_info = parse_streaming_tweet(line)
        if tweets_info is not None:
            update += 1
            sys.stdout.write(' Parsing tweet %d    \r' % (update))
            sys.stdout.flush()

            nf.last_tweet = parse_tweet(nf, tweets_info[0])
            if tweets_info[1] is not None:
                # if it's a retweet, add the original tweet, but DON'T
                # upddate "last tweet" bc it's obv gonna be older
                parse_tweet(nf, tokenizer, tweets_info[1])

        # update every 50 tweets
        if update == 50:
            sys.stdout.write('Recomputing rankings\n')
            update = 0
            rankings = compute_rankings(nf, True)
            for term in rankings[:20]:
                rank = nf.ranks[term]
                print '%s (%d, %f)\t%f' % (term, rank.freq, rank.dfreq, rank.box_size) 
            sys.stdout.write('\n')