def worker(api, friends): timestamp_start = str(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')) log_start = str((threading.currentThread().getName(), 'Launched')) start = log_start + ':' + timestamp_start print start for friend in friends: engine.index(INDEX, 'user', friend.id, social.userToJSON(friend)) request={"size":1,"sort":[{"id":{"order":"desc"}}], "query": {"match": { "user.screen_name":friend.screen_name}}} docs = engine.search(INDEX, 'tweet', request) if (len(docs["hits"]["hits"]) > 0): since_id = str(docs["hits"]["hits"][0][u'_id']) else: since_id = None tweets = social.GetTweets(api, friend.screen_name, since_id) for tweet in tweets: engine.index(INDEX, 'tweet', tweet.id, social.tweetToJSON(tweet)) timestamp_end = str(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')) log_end = str((threading.currentThread().getName(), 'Finishing')) end = log_end + ':' + timestamp_end print end return
def worker(api, friends): timestamp_start = str( datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')) log_start = str((threading.currentThread().getName(), 'Launched')) start = log_start + ':' + timestamp_start print start for friend in friends: engine.index(INDEX, 'user', friend.id, social.userToJSON(friend)) request = { "size": 1, "sort": [{ "id": { "order": "desc" } }], "query": { "match": { "user.screen_name": friend.screen_name } } } docs = engine.search(INDEX, 'tweet', request) if (len(docs["hits"]["hits"]) > 0): since_id = str(docs["hits"]["hits"][0][u'_id']) else: since_id = None tweets = social.GetTweets(api, friend.screen_name, since_id) for tweet in tweets: engine.index(INDEX, 'tweet', tweet.id, social.tweetToJSON(tweet)) timestamp_end = str( datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')) log_end = str((threading.currentThread().getName(), 'Finishing')) end = log_end + ':' + timestamp_end print end return
def on_data(self, data): # Twitter returns data in JSON format - we need to decode it first decoded = json.loads(data) id = decoded['id'] #print decoded parser.feed(decoded['source']) source = parser.getData() # Also, we convert UTF-8 to ASCII ignoring all bad characters sent by users #tweet = '@%s: %s' % (decoded['user']['screen_name'], decoded['text'].encode('ascii', 'ignore')) tweet = decoded['text'] place = decoded['place'] geo = None if ('place' in decoded): if (place is not None and 'bounding_box' in place): bounding_box = decoded['place']['bounding_box'] geo = bounding_box['coordinates'][0][0] user_location = decoded['user']['location'] sentiment = ml.classifiers.classify(ml_module_id, [tweet], sandbox=True) tweet_indexed = { "favorite_count": decoded['favorite_count'], "retweeted": decoded['retweeted'], "retweet_count": decoded['retweet_count'], "in_reply_to_user_id": decoded['in_reply_to_user_id'], "favorited": decoded['favorited'], "lang": decoded['lang'], "tag": tag, "sentiment": sentiment.result[0][0], "source": source, "created_at": decoded['created_at'], "text": tweet, "text_not_analyzed": tweet, "place": place, "geo": geo, "user_location": user_location } print(tweet) print('') engine.index(index_es, type_es, id, tweet_indexed) return True
def on_data(self, data): # Twitter returns data in JSON format - we need to decode it first decoded = json.loads(data) id = decoded['id'] #print decoded parser.feed(decoded['source']) source = parser.getData() # Also, we convert UTF-8 to ASCII ignoring all bad characters sent by users #tweet = '@%s: %s' % (decoded['user']['screen_name'], decoded['text'].encode('ascii', 'ignore')) tweet = decoded['text'] place = decoded['place'] geo = None if('place' in decoded): if (place is not None and 'bounding_box' in place): bounding_box=decoded['place']['bounding_box'] geo = bounding_box['coordinates'][0][0] user_location = decoded['user']['location'] sentiment = ml.classifiers.classify(ml_module_id, [tweet], sandbox=True) tweet_indexed = { "favorite_count": decoded['favorite_count'], "retweeted": decoded['retweeted'], "retweet_count": decoded['retweet_count'], "in_reply_to_user_id": decoded['in_reply_to_user_id'], "favorited": decoded['favorited'], "lang": decoded['lang'], "tag": tag, "sentiment": sentiment.result[0][0], "source": source, "created_at": decoded['created_at'], "text": tweet, "text_not_analyzed": tweet, "place": place, "geo": geo, "user_location": user_location } print tweet print '' engine.index(index_es, type_es, id, tweet_indexed) return True
timestamp_start = str(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')) start = 'Start: ' + timestamp_start print start if (len(sys.argv)==2): api_param = sys.argv[1] else: raise Exception('Error en cantidad de parametros ingresados!!!') api = social.api(api_param) for account in accounts: print 'Indexing ' + account user = social.GetUser(api,account) id = user.__getattribute__('_id') engine.index(INDEX, 'user', id, social.userToJSON(user)) request={"size":1,"sort":[{"id":{"order":"desc"}}], "query": {"match": { "user.screen_name":account}}} docs = engine.search(INDEX, 'tweet', request) if (len(docs["hits"]["hits"]) > 0): since_id = str(docs["hits"]["hits"][0][u'_id']) else: since_id = None tweets = social.GetTweets(api, account, since_id) for tweet in tweets: print 'Tweet> ' + tweet.text engine.index(INDEX, 'tweet', tweet.id, social.tweetToJSON(tweet))
timestamp_start = str(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')) start = 'Start: ' + timestamp_start print(start) if (len(sys.argv)==2): api_param = sys.argv[1] else: raise Exception('Error en cantidad de parametros ingresados!!!') api = social.api(api_param) for account in accounts: print('Indexing ' + account) user = social.GetUser(api,account) id = user.id engine.index(INDEX, 'user', id, social.userToJSON(user)) request={"size":1,"sort":[{"id":{"order":"desc"}}], "query": {"match": { "user.screen_name":account}}} docs = engine.search(INDEX, 'tweet', request) if (len(docs["hits"]["hits"]) > 0): since_id = str(docs["hits"]["hits"][0][u'_id']) else: since_id = None tweets = social.GetTweets(api, account, since_id) for tweet in tweets: print('Tweet> ' + tweet.text) engine.index(INDEX, 'tweet', tweet.id, social.tweetToJSON(tweet))