コード例 #1
0
ファイル: twitter_docker.py プロジェクト: pajowu/nostradamIQ
    def on_data(self, data):
        global countLoc, countAll, countAll_intervall, countLoc_intervall, outputgeo, nowDateTime, currentKeyDateTime

        # update nowDateTime:
        nowDateTime = getCurrentDateKey()

        try:
            tweet = json.loads(data)
            print('@%s tweeted: %s\nPlace: %s (%s)\n' % ( tweet['user']['screen_name'], tweet['text'], tweet['place'], tweet['coordinates']))
            countAll += 1
            countAll_intervall += 1
            # convert to and write as .geojson // returns None if no geoInfo is provided
            geoJson = format2geoJSON(tweet)
            if geoJson != None:
                with open(outputgeo, 'a+') as outPgeo:
                    json.dump(geoJson, outPgeo)
                    if nowDateTime == currentKeyDateTime: 
                        outPgeo.write(',\n')
                    else: 
                        outPgeo.write('\n')
                outPgeo.close()
                countLoc += 1
                countLoc_intervall += 1
            if countAll%100 == 0:
                print "Saw {0} tweets; {1} of them had location information!\n".format(countAll, countLoc)

        except: pass

        return True
コード例 #2
0
 def handle_tweet(self, data):
     tweet = json.loads(data)
     print("\n\n")
     print('@%s tweeted: %s\nPlace: %s (%s)\n' % ( tweet['user']['screen_name'], tweet['text'], tweet['place'], tweet['coordinates']))
     self.countAll += 1
     self.countAll_intervall += 1
     # convert to and write as .geojson // returns None if no geoInfo is provided
     geoJson = format2geoJSON(tweet)
     if geoJson != None:
         with open(self.outputgeo, 'a+') as outPgeo:
             json.dump(geoJson, outPgeo)
             outPgeo.write(',')
         outPgeo.close()
         self.countLoc += 1
         self.countLoc_intervall += 1
コード例 #3
0
    def on_data(self, data):
        global countLoc, countAll

        try:
            tweet = json.loads(data)

            if countLoc == CUTOFF: exit(0)
            """
            # only show english & german tweets with geo location and or place defined: TODO
            if (("coordinates" in tweet) or ("place" in tweet)): # and ("lang" in tweet["user"]) and (tweet["user"]["lang"] == "en" or tweet["user"]["lang"] == "de"):
                if (tweet["coordinates"] == None):
                    print( '@%s tweeted: %s\nPlace: %s\n' % ( tweet['user']['screen_name'], tweet['text'], tweet["place"]) )
                elif (tweet["place"] == None):
                    print( '@%s tweeted: %s\nlat, lng: %s\n' % ( tweet['user']['screen_name'], tweet['text'], tweet["coordinates"] ) )
                else:
                    print( '@%s tweeted: %s\nPlace, lat, lng: %s, %s\n' % ( tweet['user']['screen_name'], tweet['text'], tweet["place"], tweet["coordinates"]) )
            """

            print('@%s tweeted: %s\nPlace: %s (%s)\n' %
                  (tweet['user']['screen_name'], tweet['text'], tweet['place'],
                   tweet['coordinates']))
            countAll += 1
            # write to .txt file
            if WRITE2TXT:
                with open(outputfile, 'a+') as outP:
                    outP.write(str(tweet))
                    outP.write('\n')
                outP.close()

            # convert to and write as .geoJSON:
            geoJson = format2geoJSON(tweet)
            if geoJson != None:
                # TODO write in Redis Proxy instance
                with open(outputgeo, 'a+') as outPgeo:
                    json.dump(geoJson, outPgeo)
                    outPgeo.write(',\n')
                outPgeo.close()
                countLoc += 1
            # notification:
            if countAll % 100 == 0:
                print "Saw {0} tweets; {1} of them had location information!\n".format(
                    countAll, countLoc)

        except:
            pass

        return True
コード例 #4
0
 def handle_tweet(self, data):
     tweet = json.loads(data)
     print("\n\n")
     print('@%s tweeted: %s\nPlace: %s (%s)\n' %
           (tweet['user']['screen_name'], tweet['text'], tweet['place'],
            tweet['coordinates']))
     self.countAll += 1
     self.countAll_intervall += 1
     # convert to and write as .geojson // returns None if no geoInfo is provided
     geoJson = format2geoJSON(tweet)
     if geoJson != None:
         with open(self.outputgeo, 'a+') as outPgeo:
             json.dump(geoJson, outPgeo)
             outPgeo.write(',')
         outPgeo.close()
         self.countLoc += 1
         self.countLoc_intervall += 1
コード例 #5
0
    def on_data(self, data):
        global countLoc, countAll

        try:
            tweet = json.loads(data)

            if countLoc == CUTOFF : exit(0)

            """
            # only show english & german tweets with geo location and or place defined: TODO
            if (("coordinates" in tweet) or ("place" in tweet)): # and ("lang" in tweet["user"]) and (tweet["user"]["lang"] == "en" or tweet["user"]["lang"] == "de"):
                if (tweet["coordinates"] == None):
                    print( '@%s tweeted: %s\nPlace: %s\n' % ( tweet['user']['screen_name'], tweet['text'], tweet["place"]) )
                elif (tweet["place"] == None):
                    print( '@%s tweeted: %s\nlat, lng: %s\n' % ( tweet['user']['screen_name'], tweet['text'], tweet["coordinates"] ) )
                else:
                    print( '@%s tweeted: %s\nPlace, lat, lng: %s, %s\n' % ( tweet['user']['screen_name'], tweet['text'], tweet["place"], tweet["coordinates"]) )
            """

            print('@%s tweeted: %s\nPlace: %s (%s)\n' % ( tweet['user']['screen_name'], tweet['text'], tweet['place'], tweet['coordinates']))
            countAll += 1
            # write to .txt file
            if WRITE2TXT:
                with open(outputfile, 'a+') as outP:
                        outP.write(str(tweet)) 
                        outP.write('\n')
                outP.close()

            # convert to and write as .geoJSON:
            geoJson = format2geoJSON(tweet)
            if geoJson != None:
                # TODO write in Redis Proxy instance
                with open(outputgeo, 'a+') as outPgeo:
                    json.dump(geoJson, outPgeo)
                    outPgeo.write(',\n')
                outPgeo.close()
                countLoc += 1
            # notification:
            if countAll%100 == 0:
                print "Saw {0} tweets; {1} of them had location information!\n".format(countAll, countLoc)

        except: pass

        return True
コード例 #6
0
ファイル: twitter_docker.py プロジェクト: kkoci/nostradamIQ
    def on_data(self, data):
        global countLoc, countAll, countAll_intervall, countLoc_intervall, outputgeo, nowDateTime, currentKeyDateTime

        try:
            if str(nowDateTime) == str(currentKeyDateTime): # Changes every hour, so that we publish hourly
                tweet = json.loads(data)
                print("\n\n")
                print('@%s tweeted: %s\nPlace: %s (%s)\n' % ( tweet['user']['screen_name'], tweet['text'], tweet['place'], tweet['coordinates']))
                countAll += 1
                countAll_intervall += 1
                # convert to and write as .geojson // returns None if no geoInfo is provided
                geoJson = format2geoJSON(tweet)
                if geoJson != None:
                    with open(outputgeo, 'a+') as outPgeo:
                        json.dump(geoJson, outPgeo)
                        outPgeo.write(',')
                    outPgeo.close()
                    countLoc += 1
                    countLoc_intervall += 1
                # Update time
                nowDateTime = getCurrentDateKey() 


            else:
                print "WRITING TO NEW FILE!"
                if os.path.isfile(outputgeo):

                    print"CHECK: is already a file"
                    
                    # write last line of old one:
                    with open(outputgeo, 'a+') as outPgeo:
                        outPgeo.write(']}')
                    outPgeo.close()
                    # publish old one for one week
                    with open(outputgeo, 'r') as uploadFile:
                        # use a blob in redis to keep structure for better reading in the app
                        uploadFileJSON = json.loads(uploadFile)
                    uploadFile.close()
                    REDIS.setex(outputgeo, uploadFileJSON, 60*60*24*7) # a week in seconds
                    # stats_ARRAY_HOUR_DATE ->  {"All_Tweets_seen":countAll, "Location_Tweets_seen":countLoc, "All_Tweets_Intervall":countAll_intervall, "Location_Tweets_Intervall":countLoc_intervall}
                    REDIS.set("stats_{0}_{1}_{2}".format(searchArray, currentKeyDateTime.split(':')[0], currentKeyDateTime.split(':')[1]), {"All_Tweets_seen":countAll, "Location_Tweets_seen":countLoc, "All_Tweets_Intervall":countAll_intervall, "Location_Tweets_Intervall":countLoc_intervall})
                    countAll_intervall = 0 
                    countLoc_intervall = 0             
                    # Delete old file?
                    if DELETE_OLD: 
                        os.remove(outputgeo)

                print "CHECKPOINT 1"
                # update KeyDateTime and nowDateTime:
                currentKeyDateTime = getCurrentDateKey()
                nowDateTime = getCurrentDateKey()        
                # set new filename:
                outputgeo = outputgeo_tpl % (searchArray, currentKeyDateTime.split(':')[0], currentKeyDateTime.split(':')[1])
                print "FILENAME: %s\n" % (outputgeo)

                # write first line of new one
                with open(outputgeo, 'a+') as outPgeo:
                    outPgeo.write('{"type":"FeatureCollection","features":[')
                outPgeo.close()

                # Handle the tweet
                tweet = json.loads(data)
                print("\n\n")
                print('@%s tweeted: %s\nPlace: %s (%s)\n' % ( tweet['user']['screen_name'], tweet['text'], tweet['place'], tweet['coordinates']))
                countAll += 1
                countAll_intervall += 1
                # convert to and write as .geojson // returns None if no geoInfo is provided
                geoJson = format2geoJSON(tweet)
                if geoJson != None:
                    with open(outputgeo, 'a+') as outPgeo:
                        json.dump(geoJson, outPgeo)
                        outPgeo.write(',')
                    outPgeo.close()
                    countLoc += 1
                    countLoc_intervall += 1

            # Print Notification
            if countAll%100 == 0:
                print "Saw {0} tweets; {1} of them had location information!\n".format(countAll, countLoc)

        except: pass

        return True
コード例 #7
0
def searchTweets(keywordLists=None,
                 keywords=None,
                 language=None,
                 geo_lat=None,
                 geo_lng=None,
                 geo_rad=None,
                 timeStart=None,
                 timeStop=None,
                 no_entities=False,
                 no_retweets=False,
                 no_links=False,
                 no_answers=False):
    tweetsFound = []
    tweetsCount = 0
    tso = TwitterSearchOrder()
    # remove all restrictions from previos calls:
    tso.remove_all_filters()
    # this makes sure no videos/pics are commented
    tso.set_keywords([
        "-video", "-pic", "-foto", "-funny", "-clip", "-vid", "-movie", "-song"
    ])  # append more synonyms and other languages TODO
    try:
        tso = TwitterSearchOrder()
        if keywordLists != None:
            for keywordList in keywordLists:
                tso.add_keyword(keywordList, or_operator=True)
        if keywords != None:
            for keyword in keywords:
                tso.add_keyword(keyword, or_operator=True)
        if language != None:
            tso.set_language(str(language))
        if geo_rad != None and geo_lat != None and geo_lng != None:
            tso.set_geocode(
                geo_lat, geo_lng, geo_rad, imperial_metric=True
            )  # must be of format: str(lat,lng,radius) + 'km'/'mi'
        if timeStart != None:
            tso.add_keyword(
                'since:' +
                str(timeStart))  # time has to be of the format: YYYY-MM-DD
        if timeStop != None:
            tso.add_keyword(
                'until:' +
                str(timeStop))  # time has to be of the format: YYYY-MM-DD
        if no_entities:
            tso.set_include_entities(False)
        if no_retweets:
            pass  #tso.set_include_rts(False) #TODO
        if no_links:
            pass  #TODO
        if no_answers:
            pass  #tso.set_exclude_replies(True) #TODO

        # Maybe use sentiment analysis? // tso.set_negative_attitude_filter()

        ts = TwitterSearch(consumer_key=consumer_key,
                           consumer_secret=consumer_secret,
                           access_token=access_token,
                           access_token_secret=access_token_secret)

        for tweet in ts.search_tweets_iterable(tso, callback=my_callback):
            #tweetsFound.append(tweet)
            tweetsCount += 1
            # write to .txt file
            with open(outputfile, 'a+') as outP:
                outP.write(str(tweet))
                outP.write('\n')
            outP.close()
            # convert and write as geoJSON:
            with open(outputgeo, 'a+') as outPgeo:
                outPgeo.write(format2geoJSON(tweet))
            outPgeo.close()
            print('@%s tweeted: %s\n' %
                  (tweet['user']['screen_name'], tweet['text']))

    except TwitterSearchException as e:
        print(e)
    except requests.exceptions.SSLError as e:
        print(e)

    return tweetsCount
コード例 #8
0
def searchTweets(keywordLists=None, keywords=None, language=None, geo_lat=None, geo_lng=None, geo_rad=None, timeStart=None, timeStop=None, no_entities=False, no_retweets=False, no_links=False, no_answers=False):
        tweetsFound = []
        tweetsCount = 0
        tso = TwitterSearchOrder()
        # remove all restrictions from previos calls:
        tso.remove_all_filters()
        # this makes sure no videos/pics are commented
        tso.set_keywords(["-video", "-pic", "-foto", "-funny", "-clip", "-vid", "-movie", "-song"]) # append more synonyms and other languages TODO
        try:
            tso = TwitterSearchOrder() 
            if keywordLists != None:
                for keywordList in keywordLists:
                        tso.add_keyword(keywordList, or_operator=True)
            if keywords != None:
                for keyword in keywords:
                    tso.add_keyword(keyword, or_operator=True)
            if language != None:
                tso.set_language(str(language))
            if geo_rad != None and geo_lat != None and geo_lng != None:
                tso.set_geocode(geo_lat, geo_lng, geo_rad, imperial_metric=True) # must be of format: str(lat,lng,radius) + 'km'/'mi'
            if timeStart != None:
                tso.add_keyword('since:' + str(timeStart)) # time has to be of the format: YYYY-MM-DD
            if timeStop != None:
                tso.add_keyword('until:' + str(timeStop)) # time has to be of the format: YYYY-MM-DD
            if no_entities:
                tso.set_include_entities(False) 
            if no_retweets:
                pass #tso.set_include_rts(False) #TODO
            if no_links:
                pass #TODO
            if no_answers:
                pass #tso.set_exclude_replies(True) #TODO
            
            # Maybe use sentiment analysis? // tso.set_negative_attitude_filter()

            ts = TwitterSearch(
                consumer_key = consumer_key,
                consumer_secret = consumer_secret,
                access_token = access_token,
                access_token_secret = access_token_secret)

            for tweet in ts.search_tweets_iterable(tso, callback=my_callback):
                #tweetsFound.append(tweet)
                tweetsCount += 1
                # write to .txt file
                with open(outputfile, 'a+') as outP:
                        outP.write(str(tweet)) 
                        outP.write('\n')
                outP.close()
                # convert and write as geoJSON:
                with open(outputgeo, 'a+') as outPgeo:
                    outPgeo.write(format2geoJSON(tweet))
                outPgeo.close()
                print( '@%s tweeted: %s\n' % ( tweet['user']['screen_name'], tweet['text'] ) )

        except TwitterSearchException as e: 
                print(e)
        except requests.exceptions.SSLError as e:
                print(e)
            
        return tweetsCount
コード例 #9
0
    def on_data(self, data):
        global countLoc, countAll, countAll_intervall, countLoc_intervall, outputgeo, nowDateTime, currentKeyDateTime

        try:
            if str(nowDateTime) == str(
                    currentKeyDateTime
            ):  # Changes every hour, so that we publish hourly
                tweet = json.loads(data)
                print("\n\n")
                print('@%s tweeted: %s\nPlace: %s (%s)\n' %
                      (tweet['user']['screen_name'], tweet['text'],
                       tweet['place'], tweet['coordinates']))
                countAll += 1
                countAll_intervall += 1
                # convert to and write as .geojson // returns None if no geoInfo is provided
                geoJson = format2geoJSON(tweet)
                if geoJson != None:
                    with open(outputgeo, 'a+') as outPgeo:
                        json.dump(geoJson, outPgeo)
                        outPgeo.write(',')
                    outPgeo.close()
                    countLoc += 1
                    countLoc_intervall += 1
                # Update time
                nowDateTime = getCurrentDateKey()

            else:
                print "WRITING TO NEW FILE!"
                if os.path.isfile(outputgeo):

                    print "CHECK: is already a file"

                    # write last line of old one:
                    with open(outputgeo, 'a+') as outPgeo:
                        outPgeo.write(']}')
                    outPgeo.close()
                    # publish old one for one week
                    with open(outputgeo, 'r') as uploadFile:
                        # use a blob in redis to keep structure for better reading in the app
                        uploadFileJSON = json.loads(uploadFile)
                    uploadFile.close()
                    REDIS.setex(outputgeo, uploadFileJSON,
                                60 * 60 * 24 * 7)  # a week in seconds
                    # stats_ARRAY_HOUR_DATE ->  {"All_Tweets_seen":countAll, "Location_Tweets_seen":countLoc, "All_Tweets_Intervall":countAll_intervall, "Location_Tweets_Intervall":countLoc_intervall}
                    REDIS.set(
                        "stats_{0}_{1}_{2}".format(
                            searchArray,
                            currentKeyDateTime.split(':')[0],
                            currentKeyDateTime.split(':')[1]), {
                                "All_Tweets_seen": countAll,
                                "Location_Tweets_seen": countLoc,
                                "All_Tweets_Intervall": countAll_intervall,
                                "Location_Tweets_Intervall": countLoc_intervall
                            })
                    countAll_intervall = 0
                    countLoc_intervall = 0
                    # Delete old file?
                    if DELETE_OLD:
                        os.remove(outputgeo)

                print "CHECKPOINT 1"
                # update KeyDateTime and nowDateTime:
                currentKeyDateTime = getCurrentDateKey()
                nowDateTime = getCurrentDateKey()
                # set new filename:
                outputgeo = outputgeo_tpl % (searchArray,
                                             currentKeyDateTime.split(':')[0],
                                             currentKeyDateTime.split(':')[1])
                print "FILENAME: %s\n" % (outputgeo)

                # write first line of new one
                with open(outputgeo, 'a+') as outPgeo:
                    outPgeo.write('{"type":"FeatureCollection","features":[')
                outPgeo.close()

                # Handle the tweet
                tweet = json.loads(data)
                print("\n\n")
                print('@%s tweeted: %s\nPlace: %s (%s)\n' %
                      (tweet['user']['screen_name'], tweet['text'],
                       tweet['place'], tweet['coordinates']))
                countAll += 1
                countAll_intervall += 1
                # convert to and write as .geojson // returns None if no geoInfo is provided
                geoJson = format2geoJSON(tweet)
                if geoJson != None:
                    with open(outputgeo, 'a+') as outPgeo:
                        json.dump(geoJson, outPgeo)
                        outPgeo.write(',')
                    outPgeo.close()
                    countLoc += 1
                    countLoc_intervall += 1

            # Print Notification
            if countAll % 100 == 0:
                print "Saw {0} tweets; {1} of them had location information!\n".format(
                    countAll, countLoc)

        except:
            pass

        return True