예제 #1
0
def detectEvents(limit=300,
                 similarityType=MED_SIM,
                 minimalTermPerTweet=MIN_TERM_OCCURENCE,
                 remove_noise_with_poisson_Law=REMOVE_NOISE_WITH_POISSON_LAW,
                 printEvents=True):
    mongoDBHandler = MongoDBHandler()
    tweets = mongoDBHandler.getAllTweets(limit=limit)

    if similarityType == LED_SIM:
        s = LEDSimilarityMatrixBuilder(timeThreshold=TIME_RESOLUTION,
                                       distanceThreshold=DISTANCE_RESOLUTION)
        eventDetector = EventDetector(tweets, s)
        events = eventDetector.getEvents(
            minimalTermPerTweet=minimalTermPerTweet,
            remove_noise_with_poisson_Law=remove_noise_with_poisson_Law)
    elif similarityType == MED_SIM:
        s = MEDSimilarityMatrixBuilder(timeResolution=TIME_RESOLUTION,
                                       distanceResolution=DISTANCE_RESOLUTION,
                                       scaleNumber=SCALE_NUMBER,
                                       minSimilarity=MIN_SIMILARITY)
        eventDetector = EventDetector(tweets, s)
        events = eventDetector.getEvents(
            minimalTermPerTweet=minimalTermPerTweet,
            remove_noise_with_poisson_Law=remove_noise_with_poisson_Law)
    else:
        eventDetector = OptimisedEventDetectorMEDBased(
            tweets,
            timeResolution=TIME_RESOLUTION,
            distanceResolution=DISTANCE_RESOLUTION,
            scaleNumber=SCALE_NUMBER,
            minSimilarity=MIN_SIMILARITY)
        events = eventDetector.getEvents(
            minimalTermPerTweet=minimalTermPerTweet,
            remove_noise_with_poisson_Law=remove_noise_with_poisson_Law)

    print ""
    print "-" * 40
    print "{0} Event detected : ".format(len(events))
    print "-" * 40

    if printEvents:
        eventDetector.showTopEvents(top=10)

    return events
예제 #2
0
def detectEvents(limit=300,
                 similarityType=MED_SIM,
                 minimalTermPerTweet=MIN_TERM_OCCURENCE,
                 remove_noise_with_poisson_Law=REMOVE_NOISE_WITH_POISSON_LAW,
                 printEvents=True,
                 printInFile="events.txt",
                 useOnlyHashtags=False,
                 mongoDBName='Twitter',
                 mongoCollectionName="tweets"):
    staringTime = time.time()
    mongoDBHandler = MongoDBHandler(database_name=mongoDBName,
                                    collection_name=mongoCollectionName)
    tweets = mongoDBHandler.getAllTweets(limit=limit)

    if similarityType == LED_SIM:
        s = LEDSimilarityMatrixBuilder(timeThreshold=TIME_RESOLUTION,
                                       distanceThreshold=DISTANCE_RESOLUTION,
                                       useOnlyHashtags=useOnlyHashtags)
        eventDetector = EventDetector(tweets, s)
        events = eventDetector.getEvents(
            minimalTermPerTweet=minimalTermPerTweet,
            remove_noise_with_poisson_Law=remove_noise_with_poisson_Law)
    elif similarityType == MED_SIM:
        s = MEDSimilarityMatrixBuilder(timeResolution=TIME_RESOLUTION,
                                       distanceResolution=DISTANCE_RESOLUTION,
                                       scaleNumber=SCALE_NUMBER,
                                       minSimilarity=MIN_SIMILARITY,
                                       useOnlyHashtags=useOnlyHashtags)
        eventDetector = EventDetector(tweets, s)
        events = eventDetector.getEvents(
            minimalTermPerTweet=minimalTermPerTweet,
            remove_noise_with_poisson_Law=remove_noise_with_poisson_Law)
    else:
        eventDetector = OptimisedEventDetectorMEDBased(
            tweets,
            timeResolution=TIME_RESOLUTION,
            distanceResolution=DISTANCE_RESOLUTION,
            scaleNumber=SCALE_NUMBER,
            minSimilarity=MIN_SIMILARITY,
            useOnlyHashtags=useOnlyHashtags)
        events = eventDetector.getEvents(
            minimalTermPerTweet=minimalTermPerTweet,
            remove_noise_with_poisson_Law=remove_noise_with_poisson_Law)

    print ""
    print "-" * 40
    print "{0} Event detected : ".format(len(events))
    print "-" * 40

    if printEvents:
        eventDetector.showTopEvents(top=len(events))

    if printInFile:
        txtFile = open(printInFile, 'w')
        elapsed_time = (time.time() - staringTime)
        txtFile.write("Total elapsed time : {0}s\n".format(elapsed_time))
        txtFile.write("-" * 40 + "\n")
        SEPARATOR = "\t|"
        HEADER = "|" + SEPARATOR.join([
            "Median time", "estimated duration (s)", "mean latitude",
            "mean longitude", "radius (m)", "user number", "tweets number",
            "top hashtags"
        ]) + SEPARATOR + "\n"
        txtFile.write(HEADER)
        txtFile.write("-" * 40 + "\n")
        for event in events:
            line = eventDetector.getStringOfEvent(event)
            txtFile.write(line + "\n")
            txtFile.write("-" * 40 + "\n")
        txtFile.close()

    return events