def post(self): query_string = self.request.body_arguments.get('query') query = TwitterSearchOrder() query.set_keywords(query_string) query.set_language('en') query.set_include_entities(False) results = TwitterClient.search_tweets(query) tweets = [tweet['text'] for tweet in results['content']['statuses']] tweet_results = indicoio.batch_text_tags(tweets) n_tweets = float(len(tweet_results)) scores = defaultdict(float) for tweet in tweet_results: for category, score in tweet.items(): scores[category] += score / n_tweets category = max(scores, key=lambda x: scores[x]) data = { 'scores': scores, 'category': category } self.write(json.dumps(data))
def main(): entries = feedparser.parse(feed)['entries'] titles = [entry.get('title') for entry in entries] title_tags = batch_text_tags(titles) for entry, tags in zip(entries, title_tags): entry['tags'] = tags entries = [parsed(entry) for entry in entries] # render template with additional jinja2 data return render_template('main.html', entries=entries)
def post(self): query_string = self.request.body_arguments.get('query') query = TwitterSearchOrder() query.set_keywords(query_string) query.set_language('en') query.set_include_entities(False) results = TwitterClient.search_tweets(query) tweets = [tweet['text'] for tweet in results['content']['statuses']] tweet_results = indicoio.batch_text_tags(tweets) n_tweets = float(len(tweet_results)) scores = defaultdict(float) for tweet in tweet_results: for category, score in tweet.items(): scores[category] += score / n_tweets category = max(scores, key=lambda x: scores[x]) data = {'scores': scores, 'category': category} self.write(json.dumps(data))
def test_batch_texttags(self): test_data = ["On Monday, president Barack Obama will be..."] response = batch_text_tags(test_data, api_key=self.api_key) self.assertTrue(isinstance(response, list))
from sklearn.datasets import fetch_20newsgroups import indicoio print "Downloading data set" posts = fetch_20newsgroups().data joblib.dump(posts, "data/posts.jl") print "Tagging %d posts" % len(posts) post_tags = indicoio.batch_text_tags(posts) joblib.dump(post_tags, "data/post_tags.jl")