def detectEvents(limit=300, similarityType=MED_SIM, minimalTermPerTweet=MIN_TERM_OCCURENCE, remove_noise_with_poisson_Law=REMOVE_NOISE_WITH_POISSON_LAW, printEvents=True): mongoDBHandler = MongoDBHandler() tweets = mongoDBHandler.getAllTweets(limit=limit) if similarityType == LED_SIM: s = LEDSimilarityMatrixBuilder(timeThreshold=TIME_RESOLUTION, distanceThreshold=DISTANCE_RESOLUTION) eventDetector = EventDetector(tweets, s) events = eventDetector.getEvents( minimalTermPerTweet=minimalTermPerTweet, remove_noise_with_poisson_Law=remove_noise_with_poisson_Law) elif similarityType == MED_SIM: s = MEDSimilarityMatrixBuilder(timeResolution=TIME_RESOLUTION, distanceResolution=DISTANCE_RESOLUTION, scaleNumber=SCALE_NUMBER, minSimilarity=MIN_SIMILARITY) eventDetector = EventDetector(tweets, s) events = eventDetector.getEvents( minimalTermPerTweet=minimalTermPerTweet, remove_noise_with_poisson_Law=remove_noise_with_poisson_Law) else: eventDetector = OptimisedEventDetectorMEDBased( tweets, timeResolution=TIME_RESOLUTION, distanceResolution=DISTANCE_RESOLUTION, scaleNumber=SCALE_NUMBER, minSimilarity=MIN_SIMILARITY) events = eventDetector.getEvents( minimalTermPerTweet=minimalTermPerTweet, remove_noise_with_poisson_Law=remove_noise_with_poisson_Law) print "" print "-" * 40 print "{0} Event detected : ".format(len(events)) print "-" * 40 if printEvents: eventDetector.showTopEvents(top=10) return events
def detectEvents(limit=300, similarityType=MED_SIM, minimalTermPerTweet=MIN_TERM_OCCURENCE, remove_noise_with_poisson_Law=REMOVE_NOISE_WITH_POISSON_LAW, printEvents=True, printInFile="events.txt", useOnlyHashtags=False, mongoDBName='Twitter', mongoCollectionName="tweets"): staringTime = time.time() mongoDBHandler = MongoDBHandler(database_name=mongoDBName, collection_name=mongoCollectionName) tweets = mongoDBHandler.getAllTweets(limit=limit) if similarityType == LED_SIM: s = LEDSimilarityMatrixBuilder(timeThreshold=TIME_RESOLUTION, distanceThreshold=DISTANCE_RESOLUTION, useOnlyHashtags=useOnlyHashtags) eventDetector = EventDetector(tweets, s) events = eventDetector.getEvents( minimalTermPerTweet=minimalTermPerTweet, remove_noise_with_poisson_Law=remove_noise_with_poisson_Law) elif similarityType == MED_SIM: s = MEDSimilarityMatrixBuilder(timeResolution=TIME_RESOLUTION, distanceResolution=DISTANCE_RESOLUTION, scaleNumber=SCALE_NUMBER, minSimilarity=MIN_SIMILARITY, useOnlyHashtags=useOnlyHashtags) eventDetector = EventDetector(tweets, s) events = eventDetector.getEvents( minimalTermPerTweet=minimalTermPerTweet, remove_noise_with_poisson_Law=remove_noise_with_poisson_Law) else: eventDetector = OptimisedEventDetectorMEDBased( tweets, timeResolution=TIME_RESOLUTION, distanceResolution=DISTANCE_RESOLUTION, scaleNumber=SCALE_NUMBER, minSimilarity=MIN_SIMILARITY, useOnlyHashtags=useOnlyHashtags) events = eventDetector.getEvents( minimalTermPerTweet=minimalTermPerTweet, remove_noise_with_poisson_Law=remove_noise_with_poisson_Law) print "" print "-" * 40 print "{0} Event detected : ".format(len(events)) print "-" * 40 if printEvents: eventDetector.showTopEvents(top=len(events)) if printInFile: txtFile = open(printInFile, 'w') elapsed_time = (time.time() - staringTime) txtFile.write("Total elapsed time : {0}s\n".format(elapsed_time)) txtFile.write("-" * 40 + "\n") SEPARATOR = "\t|" HEADER = "|" + SEPARATOR.join([ "Median time", "estimated duration (s)", "mean latitude", "mean longitude", "radius (m)", "user number", "tweets number", "top hashtags" ]) + SEPARATOR + "\n" txtFile.write(HEADER) txtFile.write("-" * 40 + "\n") for event in events: line = eventDetector.getStringOfEvent(event) txtFile.write(line + "\n") txtFile.write("-" * 40 + "\n") txtFile.close() return events