Exemple #1
0
def worker(api, friends):

    timestamp_start = str(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
    log_start = str((threading.currentThread().getName(), 'Launched'))
    start = log_start + ':' + timestamp_start
    print start

    for friend in friends:
        engine.index(INDEX, 'user', friend.id, social.userToJSON(friend))

        request={"size":1,"sort":[{"id":{"order":"desc"}}], "query": {"match": {
                 "user.screen_name":friend.screen_name}}}

        docs = engine.search(INDEX, 'tweet', request)
        if (len(docs["hits"]["hits"]) > 0):
            since_id = str(docs["hits"]["hits"][0][u'_id'])
        else:
            since_id = None

        tweets = social.GetTweets(api, friend.screen_name, since_id)

        for tweet in tweets:
            engine.index(INDEX, 'tweet', tweet.id, social.tweetToJSON(tweet))

    timestamp_end = str(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
    log_end = str((threading.currentThread().getName(), 'Finishing'))
    end = log_end + ':' + timestamp_end
    print end
    return
Exemple #2
0
def worker(api, friends):

    timestamp_start = str(
        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
    log_start = str((threading.currentThread().getName(), 'Launched'))
    start = log_start + ':' + timestamp_start
    print start

    for friend in friends:
        engine.index(INDEX, 'user', friend.id, social.userToJSON(friend))

        request = {
            "size": 1,
            "sort": [{
                "id": {
                    "order": "desc"
                }
            }],
            "query": {
                "match": {
                    "user.screen_name": friend.screen_name
                }
            }
        }

        docs = engine.search(INDEX, 'tweet', request)
        if (len(docs["hits"]["hits"]) > 0):
            since_id = str(docs["hits"]["hits"][0][u'_id'])
        else:
            since_id = None

        tweets = social.GetTweets(api, friend.screen_name, since_id)

        for tweet in tweets:
            engine.index(INDEX, 'tweet', tweet.id, social.tweetToJSON(tweet))

    timestamp_end = str(
        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
    log_end = str((threading.currentThread().getName(), 'Finishing'))
    end = log_end + ':' + timestamp_end
    print end
    return
Exemple #3
0
    def on_data(self, data):
        # Twitter returns data in JSON format - we need to decode it first
        decoded = json.loads(data)
        id = decoded['id']
        #print decoded
        parser.feed(decoded['source'])
        source = parser.getData()
        # Also, we convert UTF-8 to ASCII ignoring all bad characters sent by users
        #tweet = '@%s: %s' % (decoded['user']['screen_name'], decoded['text'].encode('ascii', 'ignore'))
        tweet = decoded['text']
        place = decoded['place']
        geo = None
        if ('place' in decoded):
            if (place is not None and 'bounding_box' in place):
                bounding_box = decoded['place']['bounding_box']
                geo = bounding_box['coordinates'][0][0]

        user_location = decoded['user']['location']
        sentiment = ml.classifiers.classify(ml_module_id, [tweet],
                                            sandbox=True)
        tweet_indexed = {
            "favorite_count": decoded['favorite_count'],
            "retweeted": decoded['retweeted'],
            "retweet_count": decoded['retweet_count'],
            "in_reply_to_user_id": decoded['in_reply_to_user_id'],
            "favorited": decoded['favorited'],
            "lang": decoded['lang'],
            "tag": tag,
            "sentiment": sentiment.result[0][0],
            "source": source,
            "created_at": decoded['created_at'],
            "text": tweet,
            "text_not_analyzed": tweet,
            "place": place,
            "geo": geo,
            "user_location": user_location
        }

        print(tweet)
        print('')
        engine.index(index_es, type_es, id, tweet_indexed)
        return True
Exemple #4
0
    def on_data(self, data):
        # Twitter returns data in JSON format - we need to decode it first
        decoded = json.loads(data)
        id = decoded['id']
        #print decoded
        parser.feed(decoded['source'])
        source = parser.getData()
        # Also, we convert UTF-8 to ASCII ignoring all bad characters sent by users
        #tweet = '@%s: %s' % (decoded['user']['screen_name'], decoded['text'].encode('ascii', 'ignore'))
        tweet = decoded['text']
        place = decoded['place']
        geo = None
        if('place' in decoded):
            if (place is not None and 'bounding_box' in place):
                bounding_box=decoded['place']['bounding_box']
                geo = bounding_box['coordinates'][0][0]

        user_location = decoded['user']['location']
        sentiment = ml.classifiers.classify(ml_module_id, [tweet], sandbox=True)
        tweet_indexed = {
                        "favorite_count": decoded['favorite_count'],
                        "retweeted": decoded['retweeted'],                        
                        "retweet_count": decoded['retweet_count'], 
                        "in_reply_to_user_id": decoded['in_reply_to_user_id'], 
                        "favorited": decoded['favorited'],  
                        "lang": decoded['lang'],
                        "tag": tag,
                        "sentiment": sentiment.result[0][0],
                        "source": source,                        
                        "created_at": decoded['created_at'],
                        "text": tweet,
                        "text_not_analyzed": tweet, 
                        "place": place,
                        "geo": geo,                        
                        "user_location": user_location
                    }        

        print tweet
        print ''
        engine.index(index_es, type_es, id, tweet_indexed)
        return True
Exemple #5
0
timestamp_start = str(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
start = 'Start: ' + timestamp_start
print start

if (len(sys.argv)==2):
    api_param = sys.argv[1]
else:
    raise Exception('Error en cantidad de parametros ingresados!!!')

api = social.api(api_param)

for account in accounts:
    print 'Indexing ' + account
    user = social.GetUser(api,account)
    id = user.__getattribute__('_id')
    engine.index(INDEX, 'user', id, social.userToJSON(user))

    request={"size":1,"sort":[{"id":{"order":"desc"}}], "query": {"match": {
                 "user.screen_name":account}}}

    docs = engine.search(INDEX, 'tweet', request)
    if (len(docs["hits"]["hits"]) > 0):
        since_id = str(docs["hits"]["hits"][0][u'_id'])
    else:
        since_id = None

    tweets = social.GetTweets(api, account, since_id)

    for tweet in tweets:
        print 'Tweet>  ' + tweet.text
        engine.index(INDEX, 'tweet', tweet.id, social.tweetToJSON(tweet))
Exemple #6
0
timestamp_start = str(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
start = 'Start: ' + timestamp_start
print(start)

if (len(sys.argv)==2):
    api_param = sys.argv[1]
else:
    raise Exception('Error en cantidad de parametros ingresados!!!')

api = social.api(api_param)

for account in accounts:
    print('Indexing ' + account)
    user = social.GetUser(api,account)
    id = user.id
    engine.index(INDEX, 'user', id, social.userToJSON(user))

    request={"size":1,"sort":[{"id":{"order":"desc"}}], "query": {"match": {
                 "user.screen_name":account}}}

    docs = engine.search(INDEX, 'tweet', request)
    if (len(docs["hits"]["hits"]) > 0):
        since_id = str(docs["hits"]["hits"][0][u'_id'])
    else:
        since_id = None

    tweets = social.GetTweets(api, account, since_id)

    for tweet in tweets:
        print('Tweet>  ' + tweet.text)
        engine.index(INDEX, 'tweet', tweet.id, social.tweetToJSON(tweet))