def clusterLagDistributionMethod(hdStreamClusteringObject, currentMessageTime): lagDistribution = defaultdict(int) for cluster in hdStreamClusteringObject.clusters.values(): lag = DateTimeAirthematic.getDifferenceInTimeUnits( currentMessageTime, cluster.lastStreamAddedTime, hdStreamClusteringObject. stream_settings['time_unit_in_seconds'].seconds) lagDistribution[str(lag)] += 1 print currentMessageTime, len(hdStreamClusteringObject.clusters) iterationData = { 'time_stamp': getStringRepresentationForTweetTimestamp(currentMessageTime), 'settings': pprint.pformat(hdStreamClusteringObject.stream_settings), ClusteringParametersEstimation.clusterLagDistributionId: lagDistribution, 'lag_between_streams_added_to_cluster': hdStreamClusteringObject. stream_settings['lag_between_streams_added_to_cluster'] } # print hdStreamClusteringObject.stream_settings['lag_between_streams_added_to_cluster'] FileIO.writeToFileAsJson( iterationData, hdStreamClusteringObject.stream_settings[ '%s_file' % ClusteringParametersEstimation.clusterLagDistributionId])
def updateScore(self, currentOccuranceTime, scoreToUpdate, **stream_settings): timeDifference = DateTimeAirthematic.getDifferenceInTimeUnits( currentOccuranceTime, self.latestOccuranceTime, stream_settings["time_unit_in_seconds"].seconds ) self.score = ( exponentialDecay(self.score, stream_settings["phrase_decay_coefficient"], timeDifference) + scoreToUpdate ) self.latestOccuranceTime = currentOccuranceTime
def updateScore(self, currentOccuranceTime, scoreToUpdate, **stream_settings): timeDifference = DateTimeAirthematic.getDifferenceInTimeUnits( currentOccuranceTime, self.latestOccuranceTime, stream_settings['time_unit_in_seconds'].seconds) self.score = exponentialDecay( self.score, stream_settings['phrase_decay_coefficient'], timeDifference) + scoreToUpdate self.latestOccuranceTime = currentOccuranceTime
def parameterSpecificDataCollectionMethod(estimationObject, message): for phrase in message.vector: if phrase in estimationObject.phraseTextToPhraseObjectMap: phraseObject = estimationObject.phraseTextToPhraseObjectMap[ phrase] lag = DateTimeAirthematic.getDifferenceInTimeUnits( message.timeStamp, phraseObject.latestOccuranceTime, estimationObject. twitter_stream_settings['time_unit_in_seconds'].seconds) estimationObject.lagBetweenMessagesDistribution[str(lag)] += 1
def updateForMessage(self, message, updateMethod, **stream_settings): timeDifference = None if stream_settings['time_unit_in_seconds'] != None: timeDifference = DateTimeAirthematic.getDifferenceInTimeUnits( message.timeStamp, self.lastMessageTime, stream_settings['time_unit_in_seconds'].seconds) updateMethod( self, message.vector, decayCoefficient=stream_settings['stream_decay_coefficient'], timeDifference=timeDifference) self.lastMessageTime = message.timeStamp
def dimensionInActivityTimeEstimation(estimationObject, currentMessageTime): phrasesLagDistribution = defaultdict(int) for phraseObject in estimationObject.phraseTextToPhraseObjectMap.itervalues(): lag = DateTimeAirthematic.getDifferenceInTimeUnits(currentMessageTime, phraseObject.latestOccuranceTime, estimationObject.stream_settings['time_unit_in_seconds'].seconds) phrasesLagDistribution[str(lag)] += 1 print currentMessageTime iterationData = { 'time_stamp': getStringRepresentationForTweetTimestamp(currentMessageTime), 'settings': pprint.pformat(estimationObject.stream_settings), ParameterEstimation.dimensionInActivityTimeId:estimationObject.lagBetweenMessagesDistribution, 'phrases_lag_distribution': phrasesLagDistribution } FileIO.writeToFileAsJson(iterationData, estimationObject.dimensionInActivityTimeFile)
def updateForMessage(self, message, updateMethod, **stream_settings): timeDifference = None if stream_settings["time_unit_in_seconds"] != None: timeDifference = DateTimeAirthematic.getDifferenceInTimeUnits( message.timeStamp, self.lastMessageTime, stream_settings["time_unit_in_seconds"].seconds ) updateMethod( self, message.vector, decayCoefficient=stream_settings["stream_decay_coefficient"], timeDifference=timeDifference, ) self.lastMessageTime = message.timeStamp
def dimensionInActivityTimeEstimation(estimationObject, currentMessageTime): phrasesLagDistribution = defaultdict(int) for phraseObject in estimationObject.phraseTextToPhraseObjectMap.itervalues(): lag = DateTimeAirthematic.getDifferenceInTimeUnits(currentMessageTime, phraseObject.latestOccuranceTime, estimationObject.stream_settings['time_unit_in_seconds'].seconds) phrasesLagDistribution[str(lag)] += 1 print currentMessageTime iterationData = { 'time_stamp': getStringRepresentationForTweetTimestamp(currentMessageTime), 'settings': pprint.pformat(estimationObject.stream_settings), ParameterEstimation.dimensionInActivityTimeId:estimationObject.lagBetweenMessagesDistribution, 'phrases_lag_distribution': phrasesLagDistribution } FileIO.writeToFileAsJson(iterationData, estimationObject.dimensionInActivityTimeFile)
def clusterLagDistributionMethod(hdStreamClusteringObject, currentMessageTime): lagDistribution = defaultdict(int) for cluster in hdStreamClusteringObject.clusters.values(): lag = DateTimeAirthematic.getDifferenceInTimeUnits(currentMessageTime, cluster.lastStreamAddedTime, hdStreamClusteringObject.stream_settings['time_unit_in_seconds'].seconds) lagDistribution[str(lag)] += 1 print currentMessageTime, len(hdStreamClusteringObject.clusters) iterationData = { 'time_stamp': getStringRepresentationForTweetTimestamp(currentMessageTime), 'settings': pprint.pformat(hdStreamClusteringObject.stream_settings), ClusteringParametersEstimation.clusterLagDistributionId: lagDistribution, 'lag_between_streams_added_to_cluster': hdStreamClusteringObject.stream_settings['lag_between_streams_added_to_cluster'] } # print hdStreamClusteringObject.stream_settings['lag_between_streams_added_to_cluster'] FileIO.writeToFileAsJson(iterationData, hdStreamClusteringObject.stream_settings['%s_file' % ClusteringParametersEstimation.clusterLagDistributionId])
def analyzeClusterLag(streamCluster, stream, **stream_settings): lag = DateTimeAirthematic.getDifferenceInTimeUnits(streamCluster.lastStreamAddedTime, stream.lastMessageTime, stream_settings['time_unit_in_seconds'].seconds) stream_settings['lag_between_streams_added_to_cluster'][str(lag)] += 1
def parameterSpecificDataCollectionMethod(estimationObject, message): for phrase in message.vector: if phrase in estimationObject.phraseTextToPhraseObjectMap: phraseObject = estimationObject.phraseTextToPhraseObjectMap[phrase] lag = DateTimeAirthematic.getDifferenceInTimeUnits(message.timeStamp, phraseObject.latestOccuranceTime, estimationObject.twitter_stream_settings['time_unit_in_seconds'].seconds) estimationObject.lagBetweenMessagesDistribution[str(lag)] += 1
def analyzeClusterLag(streamCluster, stream, **stream_settings): lag = DateTimeAirthematic.getDifferenceInTimeUnits( streamCluster.lastStreamAddedTime, stream.lastMessageTime, stream_settings['time_unit_in_seconds'].seconds) stream_settings['lag_between_streams_added_to_cluster'][str(lag)] += 1