Beispiel #1
0
 def clusterLagDistributionMethod(hdStreamClusteringObject,
                                  currentMessageTime):
     lagDistribution = defaultdict(int)
     for cluster in hdStreamClusteringObject.clusters.values():
         lag = DateTimeAirthematic.getDifferenceInTimeUnits(
             currentMessageTime, cluster.lastStreamAddedTime,
             hdStreamClusteringObject.
             stream_settings['time_unit_in_seconds'].seconds)
         lagDistribution[str(lag)] += 1
     print currentMessageTime, len(hdStreamClusteringObject.clusters)
     iterationData = {
         'time_stamp':
         getStringRepresentationForTweetTimestamp(currentMessageTime),
         'settings':
         pprint.pformat(hdStreamClusteringObject.stream_settings),
         ClusteringParametersEstimation.clusterLagDistributionId:
         lagDistribution,
         'lag_between_streams_added_to_cluster':
         hdStreamClusteringObject.
         stream_settings['lag_between_streams_added_to_cluster']
     }
     #        print hdStreamClusteringObject.stream_settings['lag_between_streams_added_to_cluster']
     FileIO.writeToFileAsJson(
         iterationData, hdStreamClusteringObject.stream_settings[
             '%s_file' %
             ClusteringParametersEstimation.clusterLagDistributionId])
 def updateScore(self, currentOccuranceTime, scoreToUpdate, **stream_settings):
     timeDifference = DateTimeAirthematic.getDifferenceInTimeUnits(
         currentOccuranceTime, self.latestOccuranceTime, stream_settings["time_unit_in_seconds"].seconds
     )
     self.score = (
         exponentialDecay(self.score, stream_settings["phrase_decay_coefficient"], timeDifference) + scoreToUpdate
     )
     self.latestOccuranceTime = currentOccuranceTime
 def updateScore(self, currentOccuranceTime, scoreToUpdate,
                 **stream_settings):
     timeDifference = DateTimeAirthematic.getDifferenceInTimeUnits(
         currentOccuranceTime, self.latestOccuranceTime,
         stream_settings['time_unit_in_seconds'].seconds)
     self.score = exponentialDecay(
         self.score, stream_settings['phrase_decay_coefficient'],
         timeDifference) + scoreToUpdate
     self.latestOccuranceTime = currentOccuranceTime
Beispiel #4
0
 def parameterSpecificDataCollectionMethod(estimationObject, message):
     for phrase in message.vector:
         if phrase in estimationObject.phraseTextToPhraseObjectMap:
             phraseObject = estimationObject.phraseTextToPhraseObjectMap[
                 phrase]
             lag = DateTimeAirthematic.getDifferenceInTimeUnits(
                 message.timeStamp, phraseObject.latestOccuranceTime,
                 estimationObject.
                 twitter_stream_settings['time_unit_in_seconds'].seconds)
             estimationObject.lagBetweenMessagesDistribution[str(lag)] += 1
 def updateForMessage(self, message, updateMethod, **stream_settings):
     timeDifference = None
     if stream_settings['time_unit_in_seconds'] != None:
         timeDifference = DateTimeAirthematic.getDifferenceInTimeUnits(
             message.timeStamp, self.lastMessageTime,
             stream_settings['time_unit_in_seconds'].seconds)
     updateMethod(
         self,
         message.vector,
         decayCoefficient=stream_settings['stream_decay_coefficient'],
         timeDifference=timeDifference)
     self.lastMessageTime = message.timeStamp
 def dimensionInActivityTimeEstimation(estimationObject, currentMessageTime):
     phrasesLagDistribution = defaultdict(int)
     for phraseObject in estimationObject.phraseTextToPhraseObjectMap.itervalues():
         lag = DateTimeAirthematic.getDifferenceInTimeUnits(currentMessageTime, phraseObject.latestOccuranceTime, estimationObject.stream_settings['time_unit_in_seconds'].seconds)
         phrasesLagDistribution[str(lag)] += 1
     print currentMessageTime
     iterationData = {
                      'time_stamp': getStringRepresentationForTweetTimestamp(currentMessageTime),
                      'settings': pprint.pformat(estimationObject.stream_settings),
                      ParameterEstimation.dimensionInActivityTimeId:estimationObject.lagBetweenMessagesDistribution,
                      'phrases_lag_distribution': phrasesLagDistribution
                      }
     FileIO.writeToFileAsJson(iterationData, estimationObject.dimensionInActivityTimeFile)
 def updateForMessage(self, message, updateMethod, **stream_settings):
     timeDifference = None
     if stream_settings["time_unit_in_seconds"] != None:
         timeDifference = DateTimeAirthematic.getDifferenceInTimeUnits(
             message.timeStamp, self.lastMessageTime, stream_settings["time_unit_in_seconds"].seconds
         )
     updateMethod(
         self,
         message.vector,
         decayCoefficient=stream_settings["stream_decay_coefficient"],
         timeDifference=timeDifference,
     )
     self.lastMessageTime = message.timeStamp
 def dimensionInActivityTimeEstimation(estimationObject, currentMessageTime):
     phrasesLagDistribution = defaultdict(int)
     for phraseObject in estimationObject.phraseTextToPhraseObjectMap.itervalues():
         lag = DateTimeAirthematic.getDifferenceInTimeUnits(currentMessageTime, phraseObject.latestOccuranceTime, estimationObject.stream_settings['time_unit_in_seconds'].seconds)
         phrasesLagDistribution[str(lag)] += 1
     print currentMessageTime
     iterationData = {
                      'time_stamp': getStringRepresentationForTweetTimestamp(currentMessageTime),
                      'settings': pprint.pformat(estimationObject.stream_settings),
                      ParameterEstimation.dimensionInActivityTimeId:estimationObject.lagBetweenMessagesDistribution,
                      'phrases_lag_distribution': phrasesLagDistribution
                      }
     FileIO.writeToFileAsJson(iterationData, estimationObject.dimensionInActivityTimeFile)
    def clusterLagDistributionMethod(hdStreamClusteringObject, currentMessageTime):
        lagDistribution = defaultdict(int)
        for cluster in hdStreamClusteringObject.clusters.values():
            lag = DateTimeAirthematic.getDifferenceInTimeUnits(currentMessageTime, cluster.lastStreamAddedTime, hdStreamClusteringObject.stream_settings['time_unit_in_seconds'].seconds)
            lagDistribution[str(lag)] += 1
        print currentMessageTime, len(hdStreamClusteringObject.clusters)
        iterationData = {
                         'time_stamp': getStringRepresentationForTweetTimestamp(currentMessageTime),
                         'settings': pprint.pformat(hdStreamClusteringObject.stream_settings),
                         ClusteringParametersEstimation.clusterLagDistributionId: lagDistribution,
                         'lag_between_streams_added_to_cluster': hdStreamClusteringObject.stream_settings['lag_between_streams_added_to_cluster']
                         }
#        print hdStreamClusteringObject.stream_settings['lag_between_streams_added_to_cluster']
        FileIO.writeToFileAsJson(iterationData, hdStreamClusteringObject.stream_settings['%s_file' % ClusteringParametersEstimation.clusterLagDistributionId])
 def analyzeClusterLag(streamCluster, stream, **stream_settings):
     lag = DateTimeAirthematic.getDifferenceInTimeUnits(streamCluster.lastStreamAddedTime, stream.lastMessageTime, stream_settings['time_unit_in_seconds'].seconds)
     stream_settings['lag_between_streams_added_to_cluster'][str(lag)] += 1
 def parameterSpecificDataCollectionMethod(estimationObject, message):
     for phrase in message.vector:
         if phrase in estimationObject.phraseTextToPhraseObjectMap:
             phraseObject = estimationObject.phraseTextToPhraseObjectMap[phrase]
             lag = DateTimeAirthematic.getDifferenceInTimeUnits(message.timeStamp, phraseObject.latestOccuranceTime, estimationObject.twitter_stream_settings['time_unit_in_seconds'].seconds)
             estimationObject.lagBetweenMessagesDistribution[str(lag)] += 1
Beispiel #12
0
 def analyzeClusterLag(streamCluster, stream, **stream_settings):
     lag = DateTimeAirthematic.getDifferenceInTimeUnits(
         streamCluster.lastStreamAddedTime, stream.lastMessageTime,
         stream_settings['time_unit_in_seconds'].seconds)
     stream_settings['lag_between_streams_added_to_cluster'][str(lag)] += 1