def showTweetsSpaceDistribution(limit=300, mongoDBName='Twitter', mongoCollectionName="tweets"): mongoDBHandler = MongoDBHandler(database_name=mongoDBName, collection_name=mongoCollectionName) tweets = mongoDBHandler.getAllTweets(limit=limit) plotTweetsInSpaceDistribution(tweets)
def detectEvents(limit=300, minimalTermPerTweet=MIN_TERM_OCCURENCE, remove_noise_with_poisson_Law=REMOVE_NOISE_WITH_POISSON_LAW, printEvents=True): mongoDBHandler = MongoDBHandler() tweets = mongoDBHandler.getAllTweets(limit=limit) eventDetector = OptimisedEventDetectorMEDBased( tweets, timeResolution=TIME_RESOLUTION, distanceResolution=DISTANCE_RESOLUTION, scaleNumber=SCALE_NUMBER, minSimilarity=MIN_SIMILARITY) events = eventDetector.getEvents( minimalTermPerTweet=minimalTermPerTweet, remove_noise_with_poisson_Law=remove_noise_with_poisson_Law) print "" print "-" * 40 print "{0} Event detected : ".format(len(events)) print "-" * 40 if printEvents: eventDetector.showTopEvents(top=10) return events
def showTermSpaceDistributionByTerm(limit=300, term="#shopping", mongoDBName='Twitter', mongoCollectionName="tweets"): mongoDBHandler = MongoDBHandler(database_name=mongoDBName, collection_name=mongoCollectionName) tweets = mongoDBHandler.getAllTweets(limit=limit) plotTermInSpaceDistribution(tweets, term)
def showTermSpaceDistributionByOrder(limit=300, topTermOrder=0, mongoDBName='Twitter', mongoCollectionName="tweets"): mongoDBHandler = MongoDBHandler(database_name=mongoDBName, collection_name=mongoCollectionName) tweets = mongoDBHandler.getAllTweets(limit=limit) plotTermInSpaceDistributionWithOrder(tweets, topTermOrder=topTermOrder)
def showTermOccurenceDistribution(limit=300, mongoDBName='Twitter', mongoCollectionName="tweets", useOnlyHashtags=False): mongoDBHandler = MongoDBHandler(database_name=mongoDBName, collection_name=mongoCollectionName) tweets = mongoDBHandler.getAllTweets(limit=limit) plotTermOccurencesDistribution(tweets, useOnlyHashtags=useOnlyHashtags)
def getTweetsFromTwitterAndSave(count=100, export=False, mongoDBName='Twitter', mongoCollectionName="tweets"): mongoDBHandler = MongoDBHandler(database_name=mongoDBName, collection_name=mongoCollectionName) api = MyTwitterAPI("twitter_config_file.txt") tweets = api.getTweets(count=count, export=export) mongoDBHandler.saveTweets(tweets)
def showTweetsNumberSignal(limit=300, granularity=3600, dyadic=True, mongoDBName='Twitter', mongoCollectionName="tweets"): mongoDBHandler = MongoDBHandler(database_name=mongoDBName, collection_name=mongoCollectionName) tweets = mongoDBHandler.getAllTweets(limit=limit) plotTweetsApparitionInTime(tweets, granularity=granularity, dyadic=dyadic)
def showTermOccurenceSignalByOrder(limit=300, topTermOrder=0, granularity=3600, dyadic=True): mongoDBHandler = MongoDBHandler() tweets = mongoDBHandler.getAllTweets(limit=limit) plotTermApparitionInTimeWithOrder(tweets, topTermOrder=topTermOrder, granularity=granularity, dyadic=dyadic)
def showTermOccurenceSignalByTerm(limit=300, term="#shopping", granularity=3600, dyadic=True): mongoDBHandler = MongoDBHandler() tweets = mongoDBHandler.getAllTweets(limit=limit) plotTermApparitionInTime(tweets, term, granularity=granularity, dyadic=dyadic)
def showTermOccurenceSignalByTerm(limit=300, term="#shopping", granularity=3600, dyadic=True, mongoDBName='Twitter', mongoCollectionName="tweets"): mongoDBHandler = MongoDBHandler(database_name=mongoDBName, collection_name=mongoCollectionName) tweets = mongoDBHandler.getAllTweets(limit=limit) plotTermApparitionInTime(tweets, term, granularity=granularity, dyadic=dyadic)
def showTermOccurenceSignalByOrder(limit=300, topTermOrder=0, granularity=3600, dyadic=True, mongoDBName='Twitter', mongoCollectionName="tweets"): mongoDBHandler = MongoDBHandler(database_name=mongoDBName, collection_name=mongoCollectionName) tweets = mongoDBHandler.getAllTweets(limit=limit) plotTermApparitionInTimeWithOrder(tweets, topTermOrder=topTermOrder, granularity=granularity, dyadic=dyadic, mongoDBName='Twitter', mongoCollectionName="tweets")
def getTweetsFromCSVFileAndSave(csvFilePath="D:\\PRJS\\Data\\final.csv", mongoDBName='Twitter', mongoCollectionName="tweetsMehdi"): mongoDBHandler = MongoDBHandler(database_name=mongoDBName, collection_name=mongoCollectionName) mongoDBHandler.saveTweetsFromCSVFile(csvFilePath)
def showTweetsNumberSignal(limit=300, granularity=3600, dyadic=True): mongoDBHandler = MongoDBHandler() tweets = mongoDBHandler.getAllTweets(limit=limit) plotTweetsApparitionInTime(tweets, granularity=granularity, dyadic=dyadic)
def getTweetsFromJSONRepositoryAndSave(repositoryPath="E:\\tweets"): mongoDBHandler = MongoDBHandler() mongoDBHandler.saveTweetsFromJSONRepository(repositoryPath)
def getTweetsFromTwitterAndSave(count=100, export=False): mongoDBHandler = MongoDBHandler() api = MyTwitterAPI("twitter_config_file.txt") tweets = api.getTweets(count=count, export=export) mongoDBHandler.saveTweets(tweets)
def showTermSpaceDistributionByTerm(limit=300, term="#shopping"): mongoDBHandler = MongoDBHandler() tweets = mongoDBHandler.getAllTweets(limit=limit) plotTermInSpaceDistribution(tweets, term)
def showTermSpaceDistributionByOrder(limit=300, topTermOrder=0): mongoDBHandler = MongoDBHandler() tweets = mongoDBHandler.getAllTweets(limit=limit) plotTermInSpaceDistributionWithOrder(tweets, topTermOrder=topTermOrder)
def showTweetsSpaceDistribution(limit=300): mongoDBHandler = MongoDBHandler() tweets = mongoDBHandler.getAllTweets(limit=limit) plotTweetsInSpaceDistribution(tweets)
def detectEvents(limit=300, similarityType=MED_SIM, minimalTermPerTweet=MIN_TERM_OCCURENCE, remove_noise_with_poisson_Law=REMOVE_NOISE_WITH_POISSON_LAW, printEvents=True, printInFile="events.txt", useOnlyHashtags=False, mongoDBName='Twitter', mongoCollectionName="tweets"): staringTime = time.time() mongoDBHandler = MongoDBHandler(database_name=mongoDBName, collection_name=mongoCollectionName) tweets = mongoDBHandler.getAllTweets(limit=limit) if similarityType == LED_SIM: s = LEDSimilarityMatrixBuilder(timeThreshold=TIME_RESOLUTION, distanceThreshold=DISTANCE_RESOLUTION, useOnlyHashtags=useOnlyHashtags) eventDetector = EventDetector(tweets, s) events = eventDetector.getEvents( minimalTermPerTweet=minimalTermPerTweet, remove_noise_with_poisson_Law=remove_noise_with_poisson_Law) elif similarityType == MED_SIM: s = MEDSimilarityMatrixBuilder(timeResolution=TIME_RESOLUTION, distanceResolution=DISTANCE_RESOLUTION, scaleNumber=SCALE_NUMBER, minSimilarity=MIN_SIMILARITY, useOnlyHashtags=useOnlyHashtags) eventDetector = EventDetector(tweets, s) events = eventDetector.getEvents( minimalTermPerTweet=minimalTermPerTweet, remove_noise_with_poisson_Law=remove_noise_with_poisson_Law) else: eventDetector = OptimisedEventDetectorMEDBased( tweets, timeResolution=TIME_RESOLUTION, distanceResolution=DISTANCE_RESOLUTION, scaleNumber=SCALE_NUMBER, minSimilarity=MIN_SIMILARITY, useOnlyHashtags=useOnlyHashtags) events = eventDetector.getEvents( minimalTermPerTweet=minimalTermPerTweet, remove_noise_with_poisson_Law=remove_noise_with_poisson_Law) print "" print "-" * 40 print "{0} Event detected : ".format(len(events)) print "-" * 40 if printEvents: eventDetector.showTopEvents(top=len(events)) if printInFile: txtFile = open(printInFile, 'w') elapsed_time = (time.time() - staringTime) txtFile.write("Total elapsed time : {0}s\n".format(elapsed_time)) txtFile.write("-" * 40 + "\n") SEPARATOR = "\t|" HEADER = "|" + SEPARATOR.join([ "Median time", "estimated duration (s)", "mean latitude", "mean longitude", "radius (m)", "user number", "tweets number", "top hashtags" ]) + SEPARATOR + "\n" txtFile.write(HEADER) txtFile.write("-" * 40 + "\n") for event in events: line = eventDetector.getStringOfEvent(event) txtFile.write(line + "\n") txtFile.write("-" * 40 + "\n") txtFile.close() return events
def getTweetsFromJSONRepositoryAndSave(repositoryPath="E:\\tweets", mongoDBName='Twitter', mongoCollectionName="tweets"): mongoDBHandler = MongoDBHandler(database_name=mongoDBName, collection_name=mongoCollectionName) mongoDBHandler.saveTweetsFromJSONRepository(repositoryPath)